From 801b481beb5b235eaf9f390155422b9db733fd40 Mon Sep 17 00:00:00 2001
From: Chakib Benziane
Date: Sun, 17 Mar 2024 09:43:20 +0100
Subject: [PATCH 0001/2750] fixes #1051: handle openai presence and request
penalty parameters (#1817)
* fix request debugging, disable marshalling of context fields
Signed-off-by: blob42
* merge frequency_penalty request parm with config
Signed-off-by: blob42
* openai: add presence_penalty parameter
Signed-off-by: blob42
---------
Signed-off-by: blob42
---
core/http/endpoints/openai/request.go | 8 ++++++++
core/schema/openai.go | 2 +-
core/schema/prediction.go | 1 +
3 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/core/http/endpoints/openai/request.go b/core/http/endpoints/openai/request.go
index 505244c4..1f845c6f 100644
--- a/core/http/endpoints/openai/request.go
+++ b/core/http/endpoints/openai/request.go
@@ -185,6 +185,14 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
config.RepeatPenalty = input.RepeatPenalty
}
+ if input.FrequencyPenalty!= 0 {
+ config.FrequencyPenalty = input.FrequencyPenalty
+ }
+
+ if input.PresencePenalty!= 0 {
+ config.PresencePenalty = input.PresencePenalty
+ }
+
if input.Keep != 0 {
config.Keep = input.Keep
}
diff --git a/core/schema/openai.go b/core/schema/openai.go
index 1c13847c..6aa0f1b0 100644
--- a/core/schema/openai.go
+++ b/core/schema/openai.go
@@ -108,7 +108,7 @@ type ChatCompletionResponseFormat struct {
type OpenAIRequest struct {
PredictionOptions
- Context context.Context `json:"-"`
+ Context context.Context `json:"-"`
Cancel context.CancelFunc `json:"-"`
// whisper
diff --git a/core/schema/prediction.go b/core/schema/prediction.go
index d75e5eb8..4933f2d2 100644
--- a/core/schema/prediction.go
+++ b/core/schema/prediction.go
@@ -25,6 +25,7 @@ type PredictionOptions struct {
Keep int `json:"n_keep" yaml:"n_keep"`
FrequencyPenalty float64 `json:"frequency_penalty" yaml:"frequency_penalty"`
+ PresencePenalty float64 `json:"presence_penalty" yaml:"presence_penalty"`
TFZ float64 `json:"tfz" yaml:"tfz"`
TypicalP float64 `json:"typical_p" yaml:"typical_p"`
From 020ce29cd84fa34de359c51dc6a824b1a86a7d02 Mon Sep 17 00:00:00 2001
From: cryptk <421501+cryptk@users.noreply.github.com>
Date: Sun, 17 Mar 2024 09:39:20 -0500
Subject: [PATCH 0002/2750] fix(make): allow to parallelize jobs (#1845)
* fix: clean up Makefile dependencies to allow for parallel builds
* refactor: remove old unused backend from Makefile
* fix: finish removing legacy backend, update piper
* fix: I broke llama... I fixed llama
* feat: give the tests and builds a few threads
* fix: ensure libraries are replaced before build, add dropreplace target
* Fix image build workflows
---
.dockerignore | 2 +-
.github/workflows/image-pr.yml | 2 +
.github/workflows/image.yml | 2 +
.github/workflows/image_build.yml | 6 +
Dockerfile | 39 +++---
Makefile | 222 ++++++++++++++----------------
backend/cpp/grpc/Makefile | 2 +-
backend/cpp/llama/Makefile | 2 +-
8 files changed, 139 insertions(+), 138 deletions(-)
diff --git a/.dockerignore b/.dockerignore
index 979a26a3..97e8aa34 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -3,4 +3,4 @@ models
examples/chatbot-ui/models
examples/rwkv/models
examples/**/models
-Dockerfile
\ No newline at end of file
+Dockerfile*
\ No newline at end of file
diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml
index 2e9a0afe..17456617 100644
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -22,6 +22,7 @@ jobs:
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
base-image: ${{ matrix.base-image }}
+ makeflags: "-j3"
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
@@ -80,6 +81,7 @@ jobs:
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
base-image: ${{ matrix.base-image }}
+ makeflags: "-j3"
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index 2a7fac27..5ba0f1bf 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -26,6 +26,7 @@ jobs:
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
base-image: ${{ matrix.base-image }}
+ makeflags: "-j3"
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
@@ -199,6 +200,7 @@ jobs:
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
base-image: ${{ matrix.base-image }}
+ makeflags: "-j3"
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml
index a45473b4..a978f1bf 100644
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -46,6 +46,11 @@ on:
required: true
default: ''
type: string
+ makeflags:
+ description: 'Make Flags'
+ required: false
+ default: ''
+ type: string
secrets:
dockerUsername:
required: true
@@ -160,6 +165,7 @@ jobs:
FFMPEG=${{ inputs.ffmpeg }}
IMAGE_TYPE=${{ inputs.image-type }}
BASE_IMAGE=${{ inputs.base-image }}
+ MAKEFLAGS=${{ inputs.makeflags }}
context: .
file: ./Dockerfile
platforms: ${{ inputs.platforms }}
diff --git a/Dockerfile b/Dockerfile
index d9354e6d..ebda80ba 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -63,7 +63,9 @@ WORKDIR /build
RUN test -n "$TARGETARCH" \
|| (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
-# Extras requirements
+###################################
+###################################
+
FROM requirements-core as requirements-extras
RUN curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
@@ -93,8 +95,11 @@ FROM requirements-${IMAGE_TYPE} as builder
ARG GO_TAGS="stablediffusion tts"
ARG GRPC_BACKENDS
ARG BUILD_GRPC=true
+ARG MAKEFLAGS
+
ENV GRPC_BACKENDS=${GRPC_BACKENDS}
ENV GO_TAGS=${GO_TAGS}
+ENV MAKEFLAGS=${MAKEFLAGS}
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
ENV NVIDIA_VISIBLE_DEVICES=all
@@ -116,10 +121,10 @@ RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
RUN if [ "${BUILD_GRPC}" = "true" ]; then \
- git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
+ git clone --recurse-submodules --jobs 4 -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
-DgRPC_BUILD_TESTS=OFF \
- ../.. && make -j12 install \
+ ../.. && make install \
; fi
# Rebuild with defaults backends
@@ -139,10 +144,12 @@ ARG FFMPEG
ARG BUILD_TYPE
ARG TARGETARCH
ARG IMAGE_TYPE=extras
+ARG MAKEFLAGS
ENV BUILD_TYPE=${BUILD_TYPE}
ENV REBUILD=false
ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
+ENV MAKEFLAGS=${MAKEFLAGS}
ARG CUDA_MAJOR_VERSION=11
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
@@ -186,43 +193,43 @@ COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/
## Duplicated from Makefile to avoid having a big layer that's hard to push
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
- make -C backend/python/autogptq \
+ make -C backend/python/autogptq \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
- make -C backend/python/bark \
+ make -C backend/python/bark \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
- make -C backend/python/diffusers \
+ make -C backend/python/diffusers \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
- make -C backend/python/vllm \
+ make -C backend/python/vllm \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
- make -C backend/python/mamba \
+ make -C backend/python/mamba \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
- make -C backend/python/sentencetransformers \
+ make -C backend/python/sentencetransformers \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
- make -C backend/python/transformers \
+ make -C backend/python/transformers \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
- make -C backend/python/vall-e-x \
+ make -C backend/python/vall-e-x \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
- make -C backend/python/exllama \
+ make -C backend/python/exllama \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
- make -C backend/python/exllama2 \
+ make -C backend/python/exllama2 \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
- make -C backend/python/petals \
+ make -C backend/python/petals \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
- make -C backend/python/transformers-musicgen \
+ make -C backend/python/transformers-musicgen \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
- make -C backend/python/coqui \
+ make -C backend/python/coqui \
; fi
# Make sure the models directory exists
diff --git a/Makefile b/Makefile
index 2bd83fc1..4449e501 100644
--- a/Makefile
+++ b/Makefile
@@ -4,10 +4,7 @@ GOVET=$(GOCMD) vet
BINARY_NAME=local-ai
# llama.cpp versions
-GOLLAMA_VERSION?=6a8041ef6b46d4712afc3ae791d1c2d73da0ad1c
-
-GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7
-
+GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=d84c48505f60bcd358b82a751d40418c4d235643
# gpt4all version
@@ -148,7 +145,6 @@ endif
ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface
ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings
-ALL_GRPC_BACKENDS+=backend-assets/grpc/llama
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all
@@ -168,40 +164,41 @@ ifeq ($(BUILD_API_ONLY),true)
GRPC_BACKENDS=
endif
-.PHONY: all test build vendor
+.PHONY: all test build vendor get-sources prepare-sources prepare
all: help
-## GPT4ALL
-sources/gpt4all:
- git clone --recurse-submodules $(GPT4ALL_REPO) sources/gpt4all
- cd sources/gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1
-
-## go-piper
-sources/go-piper:
- git clone --recurse-submodules https://github.com/mudler/go-piper sources/go-piper
- cd sources/go-piper && git checkout -b build $(PIPER_VERSION) && git submodule update --init --recursive --depth 1
-
## BERT embeddings
sources/go-bert:
git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp sources/go-bert
cd sources/go-bert && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
-## stable diffusion
-sources/go-stable-diffusion:
- git clone --recurse-submodules https://github.com/mudler/go-stable-diffusion sources/go-stable-diffusion
- cd sources/go-stable-diffusion && git checkout -b build $(STABLEDIFFUSION_VERSION) && git submodule update --init --recursive --depth 1
+sources/go-bert/libgobert.a: sources/go-bert
+ $(MAKE) -C sources/go-bert libgobert.a
-sources/go-stable-diffusion/libstablediffusion.a:
- $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
+## go-llama-ggml
+sources/go-llama-ggml:
+ git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama-ggml
+ cd sources/go-llama-ggml && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1
-## tiny-dream
-sources/go-tiny-dream:
- git clone --recurse-submodules https://github.com/M0Rf30/go-tiny-dream sources/go-tiny-dream
- cd sources/go-tiny-dream && git checkout -b build $(TINYDREAM_VERSION) && git submodule update --init --recursive --depth 1
+sources/go-llama-ggml/libbinding.a: sources/go-llama-ggml
+ $(MAKE) -C sources/go-llama-ggml BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
-sources/go-tiny-dream/libtinydream.a:
- $(MAKE) -C sources/go-tiny-dream libtinydream.a
+## go-piper
+sources/go-piper:
+ git clone --recurse-submodules https://github.com/mudler/go-piper sources/go-piper
+ cd sources/go-piper && git checkout -b build $(PIPER_VERSION) && git submodule update --init --recursive --depth 1
+
+sources/go-piper/libpiper_binding.a: sources/go-piper
+ $(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
+
+## GPT4ALL
+sources/gpt4all:
+ git clone --recurse-submodules $(GPT4ALL_REPO) sources/gpt4all
+ cd sources/gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1
+
+sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all
+ $(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a
## RWKV
sources/go-rwkv:
@@ -211,23 +208,23 @@ sources/go-rwkv:
sources/go-rwkv/librwkv.a: sources/go-rwkv
cd sources/go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
-sources/go-bert/libgobert.a: sources/go-bert
- $(MAKE) -C sources/go-bert libgobert.a
+## stable diffusion
+sources/go-stable-diffusion:
+ git clone --recurse-submodules https://github.com/mudler/go-stable-diffusion sources/go-stable-diffusion
+ cd sources/go-stable-diffusion && git checkout -b build $(STABLEDIFFUSION_VERSION) && git submodule update --init --recursive --depth 1
-backend-assets/gpt4all: sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a
- mkdir -p backend-assets/gpt4all
- @cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.so backend-assets/gpt4all/ || true
- @cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
- @cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true
+sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
+ $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
-backend-assets/espeak-ng-data: sources/go-piper
- mkdir -p backend-assets/espeak-ng-data
- $(MAKE) -C sources/go-piper piper.o
- @cp -rf sources/go-piper/piper-phonemize/pi/share/espeak-ng-data/. backend-assets/espeak-ng-data
+## tiny-dream
+sources/go-tiny-dream:
+ git clone --recurse-submodules https://github.com/M0Rf30/go-tiny-dream sources/go-tiny-dream
+ cd sources/go-tiny-dream && git checkout -b build $(TINYDREAM_VERSION) && git submodule update --init --recursive --depth 1
-sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all
- $(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a
+sources/go-tiny-dream/libtinydream.a: sources/go-tiny-dream
+ $(MAKE) -C sources/go-tiny-dream libtinydream.a
+## whisper
sources/whisper.cpp:
git clone https://github.com/ggerganov/whisper.cpp.git sources/whisper.cpp
cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
@@ -235,47 +232,34 @@ sources/whisper.cpp:
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
cd sources/whisper.cpp && make libwhisper.a
-sources/go-llama:
- git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama
- cd sources/go-llama && git checkout -b build $(GOLLAMA_VERSION) && git submodule update --init --recursive --depth 1
-
-sources/go-llama-ggml:
- git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama-ggml
- cd sources/go-llama-ggml && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1
-
-sources/go-llama/libbinding.a: sources/go-llama
- $(MAKE) -C sources/go-llama BUILD_TYPE=$(BUILD_TYPE) libbinding.a
-
-sources/go-llama-ggml/libbinding.a: sources/go-llama-ggml
- $(MAKE) -C sources/go-llama-ggml BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
-
-sources/go-piper/libpiper_binding.a: sources/go-piper
- $(MAKE) -C sources/go-piper libpiper_binding.a example/main
-
-backend/cpp/llama/llama.cpp:
- LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
-
-get-sources: backend/cpp/llama/llama.cpp sources/go-llama sources/go-llama-ggml sources/gpt4all sources/go-piper sources/go-rwkv sources/whisper.cpp sources/go-bert sources/go-stable-diffusion sources/go-tiny-dream
- touch $@
+get-sources: sources/go-llama-ggml sources/gpt4all sources/go-piper sources/go-rwkv sources/whisper.cpp sources/go-bert sources/go-stable-diffusion sources/go-tiny-dream
replace:
- $(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert
- $(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
+ $(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
+ $(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang
+
+dropreplace:
+ $(GOCMD) mod edit -dropreplace github.com/donomii/go-rwkv.cpp
+ $(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
+ $(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
+ $(GOCMD) mod edit -dropreplace github.com/go-skynet/go-bert.cpp
+ $(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream
+ $(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
+ $(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
+ $(GOCMD) mod edit -dropreplace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang
prepare-sources: get-sources replace
$(GOCMD) mod download
- touch $@
## GENERIC
rebuild: ## Rebuilds the project
$(GOCMD) clean -cache
- $(MAKE) -C sources/go-llama clean
$(MAKE) -C sources/go-llama-ggml clean
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ clean
$(MAKE) -C sources/go-rwkv clean
@@ -287,7 +271,6 @@ rebuild: ## Rebuilds the project
$(MAKE) build
prepare: prepare-sources $(OPTIONAL_TARGETS)
- touch $@
clean: ## Remove build related file
$(GOCMD) clean -cache
@@ -298,10 +281,10 @@ clean: ## Remove build related file
rm -rf backend-assets
$(MAKE) -C backend/cpp/grpc clean
$(MAKE) -C backend/cpp/llama clean
+ $(MAKE) dropreplace
## Build:
-
-build: backend-assets grpcs prepare ## Build the project
+build: prepare backend-assets grpcs ## Build the project
$(info ${GREEN}I local-ai build info:${RESET})
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
@@ -454,39 +437,55 @@ ifeq ($(BUILD_API_ONLY),true)
touch backend-assets/keep
endif
-backend-assets/grpc:
+backend-assets/espeak-ng-data: sources/go-piper sources/go-piper/libpiper_binding.a
+ mkdir -p backend-assets/espeak-ng-data
+ @cp -rf sources/go-piper/piper-phonemize/pi/share/espeak-ng-data/. backend-assets/espeak-ng-data
+
+backend-assets/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a
+ mkdir -p backend-assets/gpt4all
+ @cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.so backend-assets/gpt4all/ || true
+ @cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
+ @cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true
+
+backend-assets/grpc: replace
mkdir -p backend-assets/grpc
-backend-assets/grpc/llama: backend-assets/grpc sources/go-llama/libbinding.a
- $(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama
- CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama LIBRARY_PATH=$(CURDIR)/sources/go-llama \
- $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama ./backend/go/llm/llama/
-# TODO: every binary should have its own folder instead, so can have different implementations
+backend-assets/grpc/bert-embeddings: sources/go-bert sources/go-bert/libgobert.a backend-assets/grpc
+ CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert LIBRARY_PATH=$(CURDIR)/sources/go-bert \
+ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
+
+backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc
+ CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
+ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
+
+backend-assets/grpc/langchain-huggingface: backend-assets/grpc
+ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/langchain-huggingface ./backend/go/llm/langchain/
+
+backend/cpp/llama/llama.cpp:
+ LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
-## BACKEND CPP LLAMA START
-# Sets the variables in case it has to build the gRPC locally.
INSTALLED_PACKAGES=$(CURDIR)/backend/cpp/grpc/installed_packages
INSTALLED_LIB_CMAKE=$(INSTALLED_PACKAGES)/lib/cmake
ADDED_CMAKE_ARGS=-Dabsl_DIR=${INSTALLED_LIB_CMAKE}/absl \
- -DProtobuf_DIR=${INSTALLED_LIB_CMAKE}/protobuf \
- -Dutf8_range_DIR=${INSTALLED_LIB_CMAKE}/utf8_range \
- -DgRPC_DIR=${INSTALLED_LIB_CMAKE}/grpc \
- -DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=${INSTALLED_PACKAGES}/include
-
+ -DProtobuf_DIR=${INSTALLED_LIB_CMAKE}/protobuf \
+ -Dutf8_range_DIR=${INSTALLED_LIB_CMAKE}/utf8_range \
+ -DgRPC_DIR=${INSTALLED_LIB_CMAKE}/grpc \
+ -DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=${INSTALLED_PACKAGES}/include
backend/cpp/llama/grpc-server:
+# Conditionally build grpc for the llama backend to use if needed
ifdef BUILD_GRPC_FOR_BACKEND_LLAMA
$(MAKE) -C backend/cpp/grpc build
- export _PROTOBUF_PROTOC=${INSTALLED_PACKAGES}/bin/proto && \
- export _GRPC_CPP_PLUGIN_EXECUTABLE=${INSTALLED_PACKAGES}/bin/grpc_cpp_plugin && \
- export PATH="${INSTALLED_PACKAGES}/bin:${PATH}" && \
- CMAKE_ARGS="${CMAKE_ARGS} ${ADDED_CMAKE_ARGS}" LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server
+ _PROTOBUF_PROTOC=${INSTALLED_PACKAGES}/bin/proto \
+ _GRPC_CPP_PLUGIN_EXECUTABLE=${INSTALLED_PACKAGES}/bin/grpc_cpp_plugin \
+ PATH="${INSTALLED_PACKAGES}/bin:${PATH}" \
+ CMAKE_ARGS="${CMAKE_ARGS} ${ADDED_CMAKE_ARGS}" \
+ LLAMA_VERSION=$(CPPLLAMA_VERSION) \
+ $(MAKE) -C backend/cpp/llama grpc-server
else
echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server
endif
-## BACKEND CPP LLAMA END
-##
backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/grpc-server
cp -rfv backend/cpp/llama/grpc-server backend-assets/grpc/llama-cpp
# TODO: every binary should have its own folder instead, so can have different metal implementations
@@ -494,43 +493,28 @@ ifeq ($(BUILD_TYPE),metal)
cp backend/cpp/llama/llama.cpp/build/bin/default.metallib backend-assets/grpc/
endif
-backend-assets/grpc/llama-ggml: backend-assets/grpc sources/go-llama-ggml/libbinding.a
+backend-assets/grpc/llama-ggml: sources/go-llama-ggml sources/go-llama-ggml/libbinding.a backend-assets/grpc
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama-ggml
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama-ggml LIBRARY_PATH=$(CURDIR)/sources/go-llama-ggml \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
-backend-assets/grpc/gpt4all: backend-assets/grpc backend-assets/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a
- CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
- $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
-
-backend-assets/grpc/rwkv: backend-assets/grpc sources/go-rwkv/librwkv.a
- CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv LIBRARY_PATH=$(CURDIR)/sources/go-rwkv \
- $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
-
-backend-assets/grpc/bert-embeddings: backend-assets/grpc sources/go-bert/libgobert.a
- CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert LIBRARY_PATH=$(CURDIR)/sources/go-bert \
- $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
-
-backend-assets/grpc/langchain-huggingface: backend-assets/grpc
- $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/langchain-huggingface ./backend/go/llm/langchain/
-
-backend-assets/grpc/stablediffusion: backend-assets/grpc
- if [ ! -f backend-assets/grpc/stablediffusion ]; then \
- $(MAKE) sources/go-stable-diffusion; \
- $(MAKE) sources/go-stable-diffusion/libstablediffusion.a; \
- CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-stable-diffusion/ LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
- $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion; \
- fi
-
-backend-assets/grpc/tinydream: backend-assets/grpc sources/go-tiny-dream/libtinydream.a
- CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \
- $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
-
-backend-assets/grpc/piper: backend-assets/grpc backend-assets/espeak-ng-data sources/go-piper/libpiper_binding.a
+backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
-backend-assets/grpc/whisper: backend-assets/grpc sources/whisper.cpp/libwhisper.a
+backend-assets/grpc/rwkv: sources/go-rwkv sources/go-rwkv/librwkv.a backend-assets/grpc
+ CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv LIBRARY_PATH=$(CURDIR)/sources/go-rwkv \
+ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
+
+backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
+ CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-stable-diffusion/ LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
+ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
+
+backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc
+ CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \
+ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
+
+backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/whisper.cpp LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
diff --git a/backend/cpp/grpc/Makefile b/backend/cpp/grpc/Makefile
index a6486113..6a181794 100644
--- a/backend/cpp/grpc/Makefile
+++ b/backend/cpp/grpc/Makefile
@@ -48,7 +48,7 @@ $(INSTALLED_PACKAGES): grpc_build
$(GRPC_REPO):
git clone --depth $(GIT_CLONE_DEPTH) -b $(TAG_LIB_GRPC) $(GIT_REPO_LIB_GRPC) $(GRPC_REPO)/grpc
- cd $(GRPC_REPO)/grpc && git submodule update --init --recursive --depth $(GIT_CLONE_DEPTH)
+ cd $(GRPC_REPO)/grpc && git submodule update --jobs 2 --init --recursive --depth $(GIT_CLONE_DEPTH)
$(GRPC_BUILD): $(GRPC_REPO)
mkdir -p $(GRPC_BUILD)
diff --git a/backend/cpp/llama/Makefile b/backend/cpp/llama/Makefile
index f2e17a9d..8502ae2f 100644
--- a/backend/cpp/llama/Makefile
+++ b/backend/cpp/llama/Makefile
@@ -36,7 +36,7 @@ llama.cpp:
fi
cd llama.cpp && git checkout -b build $(LLAMA_VERSION) && git submodule update --init --recursive --depth 1
-llama.cpp/examples/grpc-server:
+llama.cpp/examples/grpc-server: llama.cpp
mkdir -p llama.cpp/examples/grpc-server
cp -r $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
cp -r $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
From 88b65f63d0afc8b51e26fe6feec65ce9d1cbccc8 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Sun, 17 Mar 2024 23:08:22 +0100
Subject: [PATCH 0003/2750] fix(go-llama): use llama-cpp as default (#1849)
* fix(go-llama): use llama-cpp as default
Signed-off-by: Ettore Di Giacinto
* fix(backends): drop obsoleted lines
---------
Signed-off-by: Ettore Di Giacinto
---
pkg/model/initializers.go | 7 +------
1 file changed, 1 insertion(+), 6 deletions(-)
diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go
index 1e2af8f9..a6a84fd7 100644
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -15,12 +15,11 @@ import (
)
var Aliases map[string]string = map[string]string{
- "go-llama": GoLlamaBackend,
+ "go-llama": LLamaCPP,
"llama": LLamaCPP,
}
const (
- GoLlamaBackend = "llama"
LlamaGGML = "llama-ggml"
LLamaCPP = "llama-cpp"
Gpt4AllLlamaBackend = "gpt4all-llama"
@@ -35,15 +34,11 @@ const (
TinyDreamBackend = "tinydream"
PiperBackend = "piper"
LCHuggingFaceBackend = "langchain-huggingface"
-
- // External Backends that need special handling within LocalAI:
- TransformersMusicGen = "transformers-musicgen"
)
var AutoLoadBackends []string = []string{
LLamaCPP,
LlamaGGML,
- GoLlamaBackend,
Gpt4All,
BertEmbeddingsBackend,
RwkvBackend,
From d2b83d8357f6de6aa5512130785165d24ad92b32 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sun, 17 Mar 2024 23:08:32 +0100
Subject: [PATCH 0004/2750] :arrow_up: Update docs version mudler/LocalAI
(#1847)
Signed-off-by: GitHub
Co-authored-by: mudler
---
docs/data/version.json | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/data/version.json b/docs/data/version.json
index 59fd693c..cd7dbcda 100644
--- a/docs/data/version.json
+++ b/docs/data/version.json
@@ -1,3 +1,3 @@
{
- "version": "v2.9.0"
+ "version": "v2.10.0"
}
From 0eb0ac7dd0cde42f789f9c4b8da4fe999f58555f Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Mon, 18 Mar 2024 08:57:58 +0100
Subject: [PATCH 0005/2750] :arrow_up: Update ggerganov/llama.cpp (#1848)
Signed-off-by: GitHub
Co-authored-by: mudler
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index 4449e501..f7ce5218 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=d84c48505f60bcd358b82a751d40418c4d235643
+CPPLLAMA_VERSION?=d01b3c4c32357567f3531d4e6ceffc5d23e87583
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
From b202bfaaa02fecbe0d2f9c9555c2c5e8eaa86ca8 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Mon, 18 Mar 2024 15:56:53 +0100
Subject: [PATCH 0006/2750] deps(whisper.cpp): update, fix cublas build (#1846)
fix(whisper.cpp): Add stubs and -lcuda
---
Makefile | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/Makefile b/Makefile
index f7ce5218..8bbc0625 100644
--- a/Makefile
+++ b/Makefile
@@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
# whisper.cpp version
-WHISPER_CPP_VERSION?=37a709f6558c6d9783199e2b8cbb136e1c41d346
+WHISPER_CPP_VERSION?=a56f435fd475afd7edf02bfbf9f8c77f527198c2
# bert.cpp version
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
@@ -35,6 +35,7 @@ export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
export CMAKE_ARGS?=
CGO_LDFLAGS?=
+CGO_LDFLAGS_WHISPER?=
CUDA_LIBPATH?=/usr/local/cuda/lib64/
GO_TAGS?=
BUILD_ID?=git
@@ -88,10 +89,12 @@ ifeq ($(BUILD_TYPE),openblas)
export WHISPER_OPENBLAS=1
endif
+
ifeq ($(BUILD_TYPE),cublas)
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
export LLAMA_CUBLAS=1
export WHISPER_CUBLAS=1
+ CGO_LDFLAGS_WHISPER+=-L$(CUDA_LIBPATH)/stubs/ -lcuda
endif
ifeq ($(BUILD_TYPE),hipblas)
@@ -515,7 +518,7 @@ backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libti
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
- CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/whisper.cpp LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
+ CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH=$(CURDIR)/sources/whisper.cpp LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
grpcs: prepare $(GRPC_BACKENDS)
From fa9e330fc692e3bee72f724e88d2687dd8ed2cbc Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Mon, 18 Mar 2024 18:59:24 +0100
Subject: [PATCH 0007/2750] fix(llama.cpp): fix eos without cache (#1852)
---
backend/cpp/llama/grpc-server.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp
index c91ce854..a2e39a9c 100644
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -1084,7 +1084,7 @@ struct llama_server_context
slot.has_next_token = false;
}
- if (!slot.cache_tokens.empty() && result.tok == llama_token_eos(model))
+ if (result.tok == llama_token_eos(model))
{
slot.stopped_eos = true;
slot.has_next_token = false;
From 843f93e1ab0378dda67b77cb9c80746aba70b4c5 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Mon, 18 Mar 2024 18:59:39 +0100
Subject: [PATCH 0008/2750] fix(config): default to debug=false if not set
(#1853)
---
core/config/backend_config.go | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/core/config/backend_config.go b/core/config/backend_config.go
index 2adfeee5..daaf0257 100644
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -276,8 +276,12 @@ func (cfg *BackendConfig) SetDefaults(debug bool, threads, ctx int, f16 bool) {
cfg.F16 = &f16
}
+ if cfg.Debug == nil {
+ cfg.Debug = &falseV
+ }
+
if debug {
- cfg.Debug = &debug
+ cfg.Debug = &trueV
}
}
From a046dcac5eb5d647e21cefb92a189dbd0255518f Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Mon, 18 Mar 2024 19:14:48 +0100
Subject: [PATCH 0009/2750] fix(config-watcher): start only if config-directory
exists (#1854)
Signed-off-by: Ettore Di Giacinto
---
main.go | 13 +++++++++----
1 file changed, 9 insertions(+), 4 deletions(-)
diff --git a/main.go b/main.go
index 169c3400..400dcb57 100644
--- a/main.go
+++ b/main.go
@@ -306,11 +306,16 @@ For a list of compatible model, check out: https://localai.io/model-compatibilit
return fmt.Errorf("failed basic startup tasks with error %s", err.Error())
}
- closeConfigWatcherFn, err := startup.WatchConfigDirectory(ctx.String("localai-config-dir"), options)
- defer closeConfigWatcherFn()
+ configdir := ctx.String("localai-config-dir")
+ // Watch the configuration directory
+ // If the directory does not exist, we don't watch it
+ if _, err := os.Stat(configdir); err == nil {
+ closeConfigWatcherFn, err := startup.WatchConfigDirectory(ctx.String("localai-config-dir"), options)
+ defer closeConfigWatcherFn()
- if err != nil {
- return fmt.Errorf("failed while watching configuration directory %s", ctx.String("localai-config-dir"))
+ if err != nil {
+ return fmt.Errorf("failed while watching configuration directory %s", ctx.String("localai-config-dir"))
+ }
}
appHTTP, err := http.App(cl, ml, options)
From ed5734ae25edadb631e9de58d1f10f9c50e18c00 Mon Sep 17 00:00:00 2001
From: Dave
Date: Mon, 18 Mar 2024 14:19:43 -0400
Subject: [PATCH 0010/2750] test/fix: OSX Test Repair (#1843)
* test with gguf instead of ggml. Updates testPrompt to match? Adds debugging line to Dockerfile that I've found helpful recently.
* fix testPrompt slightly
* Sad Experiment: Test GH runner without metal?
* break apart CGO_LDFLAGS
* switch runner
* upstream llama.cpp disables Metal on Github CI!
* missed a dir from clean-tests
* CGO_LDFLAGS
* tmate failure + NO_ACCELERATE
* whisper.cpp has a metal fix
* do the exact opposite of the name of this branch, but keep it around for unrelated fixes?
* add back newlines
* add tmate to linux for testing
* update fixtures
* timeout for tmate
---
.github/workflows/test.yml | 12 ++++++++++--
Dockerfile | 1 +
Makefile | 19 +++++++++++++++----
backend/cpp/llama/Makefile | 5 +++++
core/http/api_test.go | 8 ++++----
tests/models_fixtures/config.yaml | 4 ++--
tests/models_fixtures/gpt4.yaml | 2 +-
tests/models_fixtures/gpt4_2.yaml | 2 +-
8 files changed, 39 insertions(+), 14 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 2a2cc6c8..8222508a 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -105,9 +105,13 @@ jobs:
- name: Test
run: |
GO_TAGS="stablediffusion tts" make test
+ - name: Setup tmate session if tests fail
+ if: ${{ failure() }}
+ uses: mxschmitt/action-tmate@v3
+ timeout-minutes: 5
tests-apple:
- runs-on: macOS-latest
+ runs-on: macOS-14
strategy:
matrix:
go-version: ['1.21.x']
@@ -130,4 +134,8 @@ jobs:
run: |
export C_INCLUDE_PATH=/usr/local/include
export CPLUS_INCLUDE_PATH=/usr/local/include
- CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make test
\ No newline at end of file
+ BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make test
+ - name: Setup tmate session if tests fail
+ if: ${{ failure() }}
+ uses: mxschmitt/action-tmate@v3
+ timeout-minutes: 5
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
index ebda80ba..b083690e 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -108,6 +108,7 @@ WORKDIR /build
COPY . .
COPY .git .
+RUN echo "GO_TAGS: $GO_TAGS"
RUN make prepare
# If we are building with clblas support, we need the libraries for the builds
diff --git a/Makefile b/Makefile
index 8bbc0625..ff7ec797 100644
--- a/Makefile
+++ b/Makefile
@@ -70,7 +70,7 @@ UNAME_S := $(shell uname -s)
endif
ifeq ($(OS),Darwin)
- CGO_LDFLAGS += -lcblas -framework Accelerate
+
ifeq ($(OSX_SIGNING_IDENTITY),)
OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
endif
@@ -81,6 +81,12 @@ ifeq ($(OS),Darwin)
# disable metal if on Darwin and any other value is explicitly passed.
else ifneq ($(BUILD_TYPE),metal)
CMAKE_ARGS+=-DLLAMA_METAL=OFF
+ export LLAMA_NO_ACCELERATE=1
+ endif
+
+ ifeq ($(BUILD_TYPE),metal)
+# -lcblas removed: it seems to always be listed as a duplicate flag.
+ CGO_LDFLAGS += -framework Accelerate
endif
endif
@@ -286,6 +292,11 @@ clean: ## Remove build related file
$(MAKE) -C backend/cpp/llama clean
$(MAKE) dropreplace
+clean-tests:
+ rm -rf test-models
+ rm -rf test-dir
+ rm -rf core/http/backend-assets
+
## Build:
build: prepare backend-assets grpcs ## Build the project
$(info ${GREEN}I local-ai build info:${RESET})
@@ -305,10 +316,10 @@ osx-signed: build
run: prepare ## run local-ai
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) run ./
-test-models/testmodel:
+test-models/testmodel.ggml:
mkdir test-models
mkdir test-dir
- wget -q https://huggingface.co/TheBloke/orca_mini_3B-GGML/resolve/main/orca-mini-3b.ggmlv3.q4_0.bin -O test-models/testmodel
+ wget -q https://huggingface.co/TheBloke/orca_mini_3B-GGML/resolve/main/orca-mini-3b.ggmlv3.q4_0.bin -O test-models/testmodel.ggml
wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
@@ -320,7 +331,7 @@ prepare-test: grpcs
cp -rf backend-assets core/http
cp tests/models_fixtures/* test-models
-test: prepare test-models/testmodel grpcs
+test: prepare test-models/testmodel.ggml grpcs
@echo 'Running tests'
export GO_TAGS="tts stablediffusion"
$(MAKE) prepare-test
diff --git a/backend/cpp/llama/Makefile b/backend/cpp/llama/Makefile
index 8502ae2f..3d31284a 100644
--- a/backend/cpp/llama/Makefile
+++ b/backend/cpp/llama/Makefile
@@ -19,6 +19,11 @@ else ifeq ($(BUILD_TYPE),clblas)
else ifeq ($(BUILD_TYPE),hipblas)
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON
# If it's OSX, DO NOT embed the metal library - -DLLAMA_METAL_EMBED_LIBRARY=ON requires further investigation
+# But if it's OSX without metal, disable it here
+else ifeq ($(OS),darwin)
+ ifneq ($(BUILD_TYPE),metal)
+ CMAKE_ARGS+=-DLLAMA_METAL=OFF
+ endif
endif
ifeq ($(BUILD_TYPE),sycl_f16)
diff --git a/core/http/api_test.go b/core/http/api_test.go
index b0579a19..ca69e8bf 100644
--- a/core/http/api_test.go
+++ b/core/http/api_test.go
@@ -666,15 +666,15 @@ var _ = Describe("API test", func() {
Expect(err).ToNot(HaveOccurred())
Expect(len(models.Models)).To(Equal(6)) // If "config.yaml" should be included, this should be 8?
})
- It("can generate completions", func() {
- resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: testPrompt})
+ It("can generate completions via ggml", func() {
+ resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel.ggml", Prompt: testPrompt})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
Expect(resp.Choices[0].Text).ToNot(BeEmpty())
})
- It("can generate chat completions ", func() {
- resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}})
+ It("can generate chat completions via ggml", func() {
+ resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel.ggml", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
diff --git a/tests/models_fixtures/config.yaml b/tests/models_fixtures/config.yaml
index 749d1699..f61c2a7c 100644
--- a/tests/models_fixtures/config.yaml
+++ b/tests/models_fixtures/config.yaml
@@ -1,6 +1,6 @@
- name: list1
parameters:
- model: testmodel
+ model: testmodel.ggml
top_p: 80
top_k: 0.9
temperature: 0.1
@@ -19,7 +19,7 @@
top_p: 80
top_k: 0.9
temperature: 0.1
- model: testmodel
+ model: testmodel.ggml
context_size: 200
stopwords:
- "HUMAN:"
diff --git a/tests/models_fixtures/gpt4.yaml b/tests/models_fixtures/gpt4.yaml
index 652a407c..43e77586 100644
--- a/tests/models_fixtures/gpt4.yaml
+++ b/tests/models_fixtures/gpt4.yaml
@@ -1,6 +1,6 @@
name: gpt4all
parameters:
- model: testmodel
+ model: testmodel.ggml
top_p: 80
top_k: 0.9
temperature: 0.1
diff --git a/tests/models_fixtures/gpt4_2.yaml b/tests/models_fixtures/gpt4_2.yaml
index 904693ca..8a211153 100644
--- a/tests/models_fixtures/gpt4_2.yaml
+++ b/tests/models_fixtures/gpt4_2.yaml
@@ -1,6 +1,6 @@
name: gpt4all-2
parameters:
- model: testmodel
+ model: testmodel.ggml
top_p: 80
top_k: 0.9
temperature: 0.1
From 621541a92f9c2ef84336a103046dc2ad2b4ef7e3 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Tue, 19 Mar 2024 00:44:23 +0100
Subject: [PATCH 0011/2750] :arrow_up: Update ggerganov/whisper.cpp (#1508)
Signed-off-by: GitHub
Co-authored-by: mudler
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index ff7ec797..29d49a80 100644
--- a/Makefile
+++ b/Makefile
@@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
# whisper.cpp version
-WHISPER_CPP_VERSION?=a56f435fd475afd7edf02bfbf9f8c77f527198c2
+WHISPER_CPP_VERSION?=e7794a868ffb53f5299125aaaf74fbcad93cd06c
# bert.cpp version
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
From b12a2053200e8f772ea970fc7c99ae9d2c727b37 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Tue, 19 Mar 2024 00:44:45 +0100
Subject: [PATCH 0012/2750] :arrow_up: Update docs version mudler/LocalAI
(#1856)
Signed-off-by: GitHub
Co-authored-by: mudler
---
docs/data/version.json | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/data/version.json b/docs/data/version.json
index cd7dbcda..20ca21c5 100644
--- a/docs/data/version.json
+++ b/docs/data/version.json
@@ -1,3 +1,3 @@
{
- "version": "v2.10.0"
+ "version": "v2.10.1"
}
From ead61bf9d5b6024d2a6a971bbdfd612c8e059aa7 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Tue, 19 Mar 2024 01:03:17 +0100
Subject: [PATCH 0013/2750] :arrow_up: Update ggerganov/llama.cpp (#1857)
Signed-off-by: GitHub
Co-authored-by: mudler
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index 29d49a80..f4d85d90 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=d01b3c4c32357567f3531d4e6ceffc5d23e87583
+CPPLLAMA_VERSION?=2d15886bb092c3b780c676b5cc57ff3337af9c83
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
From e4bf51d5bd9ff88164492f4518e557b08d59a18f Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Wed, 20 Mar 2024 09:05:53 +0100
Subject: [PATCH 0014/2750] :arrow_up: Update ggerganov/llama.cpp (#1864)
Signed-off-by: GitHub
Co-authored-by: mudler
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index f4d85d90..fe074592 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=2d15886bb092c3b780c676b5cc57ff3337af9c83
+CPPLLAMA_VERSION?=d8b009a9456bf5284376149f3deb09300a37701a
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
From 7e34dfdae7298979d0202c6d82bdfae655f82582 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Wed, 20 Mar 2024 23:13:29 +0100
Subject: [PATCH 0015/2750] :arrow_up: Update ggerganov/llama.cpp (#1866)
Signed-off-by: GitHub
Co-authored-by: mudler
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index fe074592..653d6beb 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=d8b009a9456bf5284376149f3deb09300a37701a
+CPPLLAMA_VERSION?=1c51f98adcbad40e3c41f0a6ffadeb723190b417
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
From eeaf8c7ccd18768406b665797f641cc302d91f13 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Wed, 20 Mar 2024 23:26:29 +0100
Subject: [PATCH 0016/2750] :arrow_up: Update ggerganov/whisper.cpp (#1867)
Signed-off-by: GitHub
Co-authored-by: mudler
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index 653d6beb..85d6c7c9 100644
--- a/Makefile
+++ b/Makefile
@@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
# whisper.cpp version
-WHISPER_CPP_VERSION?=e7794a868ffb53f5299125aaaf74fbcad93cd06c
+WHISPER_CPP_VERSION?=79d5765e7e1a904d976adfd5636da7da43163eb3
# bert.cpp version
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
From e533dcf506398e34833c8e66e7e821ffaedbee45 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Thu, 21 Mar 2024 01:12:20 +0100
Subject: [PATCH 0017/2750] feat(functions/aio): all-in-one images, function
template enhancements (#1862)
* feat(startup): allow to specify models from local files
* feat(aio): add Dockerfile, make targets, aio profiles
* feat(template): add Function and LastMessage
* add hermes2-pro-mistral
* update hermes2 definition
* feat(template): add sprig
* feat(template): expose FunctionCall
* feat(aio): switch llm for text
---
Dockerfile.aio | 9 ++++
Makefile | 14 ++++++
aio/cpu/embeddings.yaml | 13 ++++++
aio/cpu/image-gen.yaml | 53 +++++++++++++++++++++++
aio/cpu/speech-to-text.yaml | 18 ++++++++
aio/cpu/text-to-speech.yaml | 15 +++++++
aio/cpu/text-to-text.yaml | 22 ++++++++++
aio/cpu/vision.yaml | 40 +++++++++++++++++
aio/gpu-8g/embeddings.yaml | 13 ++++++
aio/gpu-8g/image-gen.yaml | 22 ++++++++++
aio/gpu-8g/speech-to-text.yaml | 18 ++++++++
aio/gpu-8g/text-to-speech.yaml | 15 +++++++
aio/gpu-8g/text-to-text.yaml | 51 ++++++++++++++++++++++
aio/gpu-8g/vision.yaml | 40 +++++++++++++++++
core/http/endpoints/openai/chat.go | 3 ++
embedded/models/hermes-2-pro-mistral.yaml | 51 ++++++++++++++++++++++
go.mod | 9 ++++
go.sum | 34 +++++++++++++++
pkg/model/loader.go | 6 ++-
pkg/startup/model_preload.go | 18 +++++++-
20 files changed, 462 insertions(+), 2 deletions(-)
create mode 100644 Dockerfile.aio
create mode 100644 aio/cpu/embeddings.yaml
create mode 100644 aio/cpu/image-gen.yaml
create mode 100644 aio/cpu/speech-to-text.yaml
create mode 100644 aio/cpu/text-to-speech.yaml
create mode 100644 aio/cpu/text-to-text.yaml
create mode 100644 aio/cpu/vision.yaml
create mode 100644 aio/gpu-8g/embeddings.yaml
create mode 100644 aio/gpu-8g/image-gen.yaml
create mode 100644 aio/gpu-8g/speech-to-text.yaml
create mode 100644 aio/gpu-8g/text-to-speech.yaml
create mode 100644 aio/gpu-8g/text-to-text.yaml
create mode 100644 aio/gpu-8g/vision.yaml
create mode 100644 embedded/models/hermes-2-pro-mistral.yaml
diff --git a/Dockerfile.aio b/Dockerfile.aio
new file mode 100644
index 00000000..4097e6d5
--- /dev/null
+++ b/Dockerfile.aio
@@ -0,0 +1,9 @@
+ARG BASE_IMAGE=ubuntu:22.04
+
+FROM ${BASE_IMAGE}
+ARG SIZE=cpu
+ENV MODELS="/aio-models/embeddings.yaml,/aio-models/text-to-speech.yaml,/aio-models/image-gen.yaml,/aio-models/text-to-text.yaml,/aio-models/speech-to-text.yaml,/aio-models/vision.yaml"
+
+COPY aio/${SIZE} /aio-models
+
+ENTRYPOINT [ "/build/entrypoint.sh" ]
\ No newline at end of file
diff --git a/Makefile b/Makefile
index 85d6c7c9..c03091d0 100644
--- a/Makefile
+++ b/Makefile
@@ -535,6 +535,8 @@ backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.
grpcs: prepare $(GRPC_BACKENDS)
DOCKER_IMAGE?=local-ai
+DOCKER_AIO_IMAGE?=local-ai-aio
+DOCKER_AIO_SIZE?=cpu
IMAGE_TYPE?=core
BASE_IMAGE?=ubuntu:22.04
@@ -545,6 +547,18 @@ docker:
--build-arg GO_TAGS=$(GO_TAGS) \
--build-arg BUILD_TYPE=$(BUILD_TYPE) \
-t $(DOCKER_IMAGE) .
+
+docker-aio:
+ @echo "Building AIO image with size $(DOCKER_AIO_SIZE)"
+ @echo "Building AIO image with base image $(BASE_IMAGE)"
+ docker build \
+ --build-arg BASE_IMAGE=$(BASE_IMAGE) \
+ --build-arg SIZE=$(DOCKER_AIO_SIZE) \
+ -t $(DOCKER_AIO_IMAGE) -f Dockerfile.aio .
+
+docker-aio-all:
+ $(MAKE) docker-aio DOCKER_AIO_SIZE=cpu
+ $(MAKE) docker-aio DOCKER_AIO_SIZE=cpu
docker-image-intel:
docker build \
diff --git a/aio/cpu/embeddings.yaml b/aio/cpu/embeddings.yaml
new file mode 100644
index 00000000..512d63a4
--- /dev/null
+++ b/aio/cpu/embeddings.yaml
@@ -0,0 +1,13 @@
+name: all-minilm-l6-v2
+backend: sentencetransformers
+embeddings: true
+parameters:
+ model: all-MiniLM-L6-v2
+
+usage: |
+ You can test this model with curl like this:
+
+ curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
+ "input": "Your text string goes here",
+ "model": "all-minilm-l6-v2"
+ }'
\ No newline at end of file
diff --git a/aio/cpu/image-gen.yaml b/aio/cpu/image-gen.yaml
new file mode 100644
index 00000000..3b9c2eec
--- /dev/null
+++ b/aio/cpu/image-gen.yaml
@@ -0,0 +1,53 @@
+name: stablediffusion
+backend: stablediffusion
+parameters:
+ model: stablediffusion_assets
+
+license: "BSD-3"
+urls:
+- https://github.com/EdVince/Stable-Diffusion-NCNN
+- https://github.com/EdVince/Stable-Diffusion-NCNN/blob/main/LICENSE
+
+description: |
+ Stable Diffusion in NCNN with c++, supported txt2img and img2img
+
+download_files:
+- filename: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param"
+ sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82"
+ uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param"
+- filename: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param"
+ sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20"
+ uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param"
+- filename: "stablediffusion_assets/AutoencoderKL-base-fp16.param"
+ sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba"
+ uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param"
+- filename: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin"
+ sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa"
+ uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-encoder-512-512-fp16.bin"
+- filename: "stablediffusion_assets/AutoencoderKL-fp16.bin"
+ sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd"
+ uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-fp16.bin"
+- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin"
+ sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6"
+ uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/FrozenCLIPEmbedder-fp16.bin"
+- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param"
+ sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9"
+ uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param"
+- filename: "stablediffusion_assets/log_sigmas.bin"
+ sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac"
+ uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin"
+- filename: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param"
+ sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1"
+ uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param"
+- filename: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param"
+ sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa"
+ uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param"
+- filename: "stablediffusion_assets/UNetModel-base-MHA-fp16.param"
+ sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d"
+ uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param"
+- filename: "stablediffusion_assets/UNetModel-MHA-fp16.bin"
+ sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3"
+ uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/UNetModel-MHA-fp16.bin"
+- filename: "stablediffusion_assets/vocab.txt"
+ sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
+ uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt"
\ No newline at end of file
diff --git a/aio/cpu/speech-to-text.yaml b/aio/cpu/speech-to-text.yaml
new file mode 100644
index 00000000..f7ebd217
--- /dev/null
+++ b/aio/cpu/speech-to-text.yaml
@@ -0,0 +1,18 @@
+name: whisper
+backend: whisper
+parameters:
+ model: ggml-whisper-base.bin
+
+usage: |
+ ## example audio file
+ wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
+
+ ## Send the example audio file to the transcriptions endpoint
+ curl http://localhost:8080/v1/audio/transcriptions \
+ -H "Content-Type: multipart/form-data" \
+ -F file="@$PWD/gb1.ogg" -F model="whisper"
+
+download_files:
+- filename: "ggml-whisper-base.bin"
+ sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
+ uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"
\ No newline at end of file
diff --git a/aio/cpu/text-to-speech.yaml b/aio/cpu/text-to-speech.yaml
new file mode 100644
index 00000000..93c11403
--- /dev/null
+++ b/aio/cpu/text-to-speech.yaml
@@ -0,0 +1,15 @@
+name: voice-en-us-amy-low
+download_files:
+ - filename: voice-en-us-amy-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
+
+parameters:
+ model: en-us-amy-low.onnx
+
+usage: |
+ To test if this model works as expected, you can use the following curl command:
+
+ curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
+ "model":"voice-en-us-amy-low",
+ "input": "Hi, this is a test."
+ }'
\ No newline at end of file
diff --git a/aio/cpu/text-to-text.yaml b/aio/cpu/text-to-text.yaml
new file mode 100644
index 00000000..7558ba9f
--- /dev/null
+++ b/aio/cpu/text-to-text.yaml
@@ -0,0 +1,22 @@
+name: gpt-3.5-turbo
+context_size: 2048
+f16: true
+gpu_layers: 90
+mmap: true
+trimsuffix:
+- "\n"
+parameters:
+ model: huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf
+
+template:
+ chat: &template |-
+ Instruct: {{.Input}}
+ Output:
+ completion: *template
+
+usage: |
+ To use this model, interact with the API (in another terminal) with curl for instance:
+ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+ "model": "phi-2",
+ "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
+ }'
diff --git a/aio/cpu/vision.yaml b/aio/cpu/vision.yaml
new file mode 100644
index 00000000..3d240681
--- /dev/null
+++ b/aio/cpu/vision.yaml
@@ -0,0 +1,40 @@
+backend: llama-cpp
+context_size: 4096
+f16: true
+
+gpu_layers: 90
+mmap: true
+name: llava
+
+roles:
+ user: "USER:"
+ assistant: "ASSISTANT:"
+ system: "SYSTEM:"
+
+mmproj: bakllava-mmproj.gguf
+parameters:
+ model: bakllava.gguf
+ temperature: 0.2
+ top_k: 40
+ top_p: 0.95
+ seed: -1
+mirostat: 2
+mirostat_eta: 1.0
+mirostat_tau: 1.0
+
+template:
+ chat: |
+ A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
+ {{.Input}}
+ ASSISTANT:
+
+download_files:
+- filename: bakllava.gguf
+ uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf
+- filename: bakllava-mmproj.gguf
+ uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf
+
+usage: |
+ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+ "model": "llava",
+ "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
diff --git a/aio/gpu-8g/embeddings.yaml b/aio/gpu-8g/embeddings.yaml
new file mode 100644
index 00000000..512d63a4
--- /dev/null
+++ b/aio/gpu-8g/embeddings.yaml
@@ -0,0 +1,13 @@
+name: all-minilm-l6-v2
+backend: sentencetransformers
+embeddings: true
+parameters:
+ model: all-MiniLM-L6-v2
+
+usage: |
+ You can test this model with curl like this:
+
+ curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
+ "input": "Your text string goes here",
+ "model": "all-minilm-l6-v2"
+ }'
\ No newline at end of file
diff --git a/aio/gpu-8g/image-gen.yaml b/aio/gpu-8g/image-gen.yaml
new file mode 100644
index 00000000..3857cd6b
--- /dev/null
+++ b/aio/gpu-8g/image-gen.yaml
@@ -0,0 +1,22 @@
+name: dreamshaper
+parameters:
+ model: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors
+backend: diffusers
+step: 25
+f16: true
+cuda: true
+diffusers:
+ pipeline_type: StableDiffusionPipeline
+ cuda: true
+ enable_parameters: "negative_prompt,num_inference_steps"
+ scheduler_type: "k_dpmpp_2m"
+
+usage: |
+ curl http://localhost:8080/v1/images/generations \
+ -H "Content-Type: application/json" \
+ -d '{
+ "prompt": "|",
+ "model": "dreamshaper",
+ "step": 25,
+ "size": "512x512"
+ }'
\ No newline at end of file
diff --git a/aio/gpu-8g/speech-to-text.yaml b/aio/gpu-8g/speech-to-text.yaml
new file mode 100644
index 00000000..f7ebd217
--- /dev/null
+++ b/aio/gpu-8g/speech-to-text.yaml
@@ -0,0 +1,18 @@
+name: whisper
+backend: whisper
+parameters:
+ model: ggml-whisper-base.bin
+
+usage: |
+ ## example audio file
+ wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
+
+ ## Send the example audio file to the transcriptions endpoint
+ curl http://localhost:8080/v1/audio/transcriptions \
+ -H "Content-Type: multipart/form-data" \
+ -F file="@$PWD/gb1.ogg" -F model="whisper"
+
+download_files:
+- filename: "ggml-whisper-base.bin"
+ sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
+ uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"
\ No newline at end of file
diff --git a/aio/gpu-8g/text-to-speech.yaml b/aio/gpu-8g/text-to-speech.yaml
new file mode 100644
index 00000000..93c11403
--- /dev/null
+++ b/aio/gpu-8g/text-to-speech.yaml
@@ -0,0 +1,15 @@
+name: voice-en-us-amy-low
+download_files:
+ - filename: voice-en-us-amy-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
+
+parameters:
+ model: en-us-amy-low.onnx
+
+usage: |
+ To test if this model works as expected, you can use the following curl command:
+
+ curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
+ "model":"voice-en-us-amy-low",
+ "input": "Hi, this is a test."
+ }'
\ No newline at end of file
diff --git a/aio/gpu-8g/text-to-text.yaml b/aio/gpu-8g/text-to-text.yaml
new file mode 100644
index 00000000..d91e057c
--- /dev/null
+++ b/aio/gpu-8g/text-to-text.yaml
@@ -0,0 +1,51 @@
+name: gpt-3.5-turbo
+mmap: true
+parameters:
+ model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf
+
+roles:
+ assistant_function_call: assistant
+ function: tool
+template:
+ chat_message: |
+ <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "function"}}{{.Role}}{{else if eq .RoleName "user"}}user{{end}}
+ {{ if eq .RoleName "assistant_function_call" }}{{end}}
+ {{ if eq .RoleName "function" }}{{end}}
+ {{if .Content}}{{.Content}}{{end}}
+ {{if .FunctionCall}}{{toJson .FunctionCall}}{{end}}
+ {{ if eq .RoleName "assistant_function_call" }} {{end}}
+ {{ if eq .RoleName "function" }}{{end}}
+ <|im_end|>
+ # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
+ function: |
+ <|im_start|>system
+ You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
+
+ {{range .Functions}}
+ {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
+ {{end}}
+
+ Use the following pydantic model json schema for each tool call you will make:
+ {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
+ For each function call return a json object with function name and arguments within XML tags as follows:
+
+ {'arguments': , 'name': }
+ <|im_end|>
+ {{.Input}}
+ <|im_start|>assistant
+
+ chat: |
+ {{.Input}}
+ <|im_start|>assistant
+ completion: |
+ {{.Input}}
+context_size: 4096
+f16: true
+stopwords:
+- <|im_end|>
+-
+usage: |
+ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+ "model": "hermes-2-pro-mistral",
+ "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
+ }'
diff --git a/aio/gpu-8g/vision.yaml b/aio/gpu-8g/vision.yaml
new file mode 100644
index 00000000..3d240681
--- /dev/null
+++ b/aio/gpu-8g/vision.yaml
@@ -0,0 +1,40 @@
+backend: llama-cpp
+context_size: 4096
+f16: true
+
+gpu_layers: 90
+mmap: true
+name: llava
+
+roles:
+ user: "USER:"
+ assistant: "ASSISTANT:"
+ system: "SYSTEM:"
+
+mmproj: bakllava-mmproj.gguf
+parameters:
+ model: bakllava.gguf
+ temperature: 0.2
+ top_k: 40
+ top_p: 0.95
+ seed: -1
+mirostat: 2
+mirostat_eta: 1.0
+mirostat_tau: 1.0
+
+template:
+ chat: |
+ A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
+ {{.Input}}
+ ASSISTANT:
+
+download_files:
+- filename: bakllava.gguf
+ uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf
+- filename: bakllava-mmproj.gguf
+ uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf
+
+usage: |
+ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+ "model": "llava",
+ "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go
index 3add0972..383a2b77 100644
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -248,7 +248,10 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
Role: r,
RoleName: role,
Content: i.StringContent,
+ FunctionCall: i.FunctionCall,
FunctionName: i.Name,
+ LastMessage: messageIndex == (len(input.Messages) - 1),
+ Function: config.Grammar != "" && (messageIndex == (len(input.Messages) - 1)),
MessageIndex: messageIndex,
}
templatedChatMessage, err := ml.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData)
diff --git a/embedded/models/hermes-2-pro-mistral.yaml b/embedded/models/hermes-2-pro-mistral.yaml
new file mode 100644
index 00000000..84510d2a
--- /dev/null
+++ b/embedded/models/hermes-2-pro-mistral.yaml
@@ -0,0 +1,51 @@
+name: hermes-2-pro-mistral
+mmap: true
+parameters:
+ model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf
+
+roles:
+ assistant_function_call: assistant
+ function: tool
+template:
+ chat_message: |
+ <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "function"}}{{.Role}}{{else if eq .RoleName "user"}}user{{end}}
+ {{ if eq .RoleName "assistant_function_call" }}{{end}}
+ {{ if eq .RoleName "function" }}{{end}}
+ {{if .Content}}{{.Content}}{{end}}
+ {{if .FunctionCall}}{{toJson .FunctionCall}}{{end}}
+ {{ if eq .RoleName "assistant_function_call" }} {{end}}
+ {{ if eq .RoleName "function" }}{{end}}
+ <|im_end|>
+ # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
+ function: |
+ <|im_start|>system
+ You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
+
+ {{range .Functions}}
+ {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
+ {{end}}
+
+ Use the following pydantic model json schema for each tool call you will make:
+ {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
+ For each function call return a json object with function name and arguments within XML tags as follows:
+
+ {'arguments': , 'name': }
+ <|im_end|>
+ {{.Input}}
+ <|im_start|>assistant
+
+ chat: |
+ {{.Input}}
+ <|im_start|>assistant
+ completion: |
+ {{.Input}}
+context_size: 4096
+f16: true
+stopwords:
+- <|im_end|>
+-
+usage: |
+ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+ "model": "hermes-2-pro-mistral",
+ "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
+ }'
diff --git a/go.mod b/go.mod
index b218ca41..f2c53e84 100644
--- a/go.mod
+++ b/go.mod
@@ -53,6 +53,9 @@ require (
)
require (
+ github.com/Masterminds/goutils v1.1.1 // indirect
+ github.com/Masterminds/semver/v3 v3.2.0 // indirect
+ github.com/Masterminds/sprig/v3 v3.2.3 // indirect
github.com/alecthomas/chroma v0.10.0 // indirect
github.com/aymanbagabas/go-osc52 v1.0.3 // indirect
github.com/aymerick/douceur v0.2.0 // indirect
@@ -66,11 +69,14 @@ require (
github.com/golang/protobuf v1.5.3 // indirect
github.com/golang/snappy v0.0.2 // indirect
github.com/gorilla/css v1.0.0 // indirect
+ github.com/huandu/xstrings v1.3.3 // indirect
github.com/klauspost/pgzip v1.2.5 // indirect
github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
github.com/microcosm-cc/bluemonday v1.0.26 // indirect
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
+ github.com/mitchellh/copystructure v1.0.0 // indirect
+ github.com/mitchellh/reflectwalk v1.0.0 // indirect
github.com/muesli/reflow v0.3.0 // indirect
github.com/muesli/termenv v0.13.0 // indirect
github.com/nwaples/rardecode v1.1.0 // indirect
@@ -81,12 +87,15 @@ require (
github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16 // indirect
github.com/prometheus/common v0.44.0 // indirect
github.com/prometheus/procfs v0.11.1 // indirect
+ github.com/shopspring/decimal v1.2.0 // indirect
+ github.com/spf13/cast v1.3.1 // indirect
github.com/ulikunitz/xz v0.5.9 // indirect
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect
github.com/yuin/goldmark v1.5.2 // indirect
github.com/yuin/goldmark-emoji v1.0.1 // indirect
go.opentelemetry.io/otel/sdk v1.19.0 // indirect
go.opentelemetry.io/otel/trace v1.19.0 // indirect
+ golang.org/x/crypto v0.14.0 // indirect
golang.org/x/term v0.13.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d // indirect
gopkg.in/fsnotify.v1 v1.4.7 // indirect
diff --git a/go.sum b/go.sum
index a3ecade2..7238ceba 100644
--- a/go.sum
+++ b/go.sum
@@ -1,5 +1,11 @@
github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf h1:UgjXLcE9I+VaVz7uBIlzAnyZIXwiDlIiTWqCh159aUI=
github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf/go.mod h1:UOf2Mb/deUri5agct5OJ4SLWjhI+kZKbsUVUeRb24I0=
+github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
+github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=
+github.com/Masterminds/semver/v3 v3.2.0 h1:3MEsd0SM6jqZojhjLWWeBY+Kcjy9i6MQAeY7YgDP83g=
+github.com/Masterminds/semver/v3 v3.2.0/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ=
+github.com/Masterminds/sprig/v3 v3.2.3 h1:eL2fZNezLomi0uOLqjQoN6BfsDD+fyLtgbJMAj9n6YA=
+github.com/Masterminds/sprig/v3 v3.2.3/go.mod h1:rXcFaZ2zZbLRJv/xSysmlgIM1u11eBaRMhvYXJNkGuM=
github.com/alecthomas/chroma v0.10.0 h1:7XDcGkCQopCNKjZHfYrNLraA+M7e0fMiJ/Mfikbfjek=
github.com/alecthomas/chroma v0.10.0/go.mod h1:jtJATyUxlIORhUOFNA9NZDWGAQ8wpxQQqNSB4rjA/1s=
github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y=
@@ -85,6 +91,7 @@ github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE8dj7HMvPfh66eeA2JYW7eFpSE=
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
+github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4=
github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/gorilla/css v1.0.0 h1:BQqNyPTi50JCFMTw/b67hByjMVXZRwGha6wxVGkeihY=
@@ -95,7 +102,10 @@ github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+l
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI=
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
+github.com/huandu/xstrings v1.3.3 h1:/Gcsuc1x8JVbJ9/rlye4xZnVAbEkGauT8lbebqcQws4=
+github.com/huandu/xstrings v1.3.3/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
+github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA=
github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4=
github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY=
github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw=
@@ -136,6 +146,10 @@ github.com/microcosm-cc/bluemonday v1.0.26 h1:xbqSvqzQMeEHCqMi64VAs4d8uy6Mequs3r
github.com/microcosm-cc/bluemonday v1.0.26/go.mod h1:JyzOCs9gkyQyjs+6h10UEVSe02CGwkhd72Xdqh78TWs=
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ=
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw=
+github.com/mitchellh/copystructure v1.0.0 h1:Laisrj+bAB6b/yJwB5Bt3ITZhGJdqmxquMKeZ+mmkFQ=
+github.com/mitchellh/copystructure v1.0.0/go.mod h1:SNtv71yrdKgLRyLFxmLdkAbkKEFWgYaq1OVrnRcwhnw=
+github.com/mitchellh/reflectwalk v1.0.0 h1:9D+8oIskB4VJBN5SFlmc27fSlIBZaov1Wpk/IfikLNY=
+github.com/mitchellh/reflectwalk v1.0.0/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760 h1:OFVkSxR7CRSRSNm5dvpMRZwmSwWa8EMMnHbc84fW5tU=
github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig=
github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c h1:CI5uGwqBpN8N7BrSKC+nmdfw+9nPQIDyjHHlaIiitZI=
@@ -210,9 +224,14 @@ github.com/shoenig/go-m1cpu v0.1.6 h1:nxdKQNcEB6vzgA2E2bvzKIYRuNj7XNJ4S/aRSwKzFt
github.com/shoenig/go-m1cpu v0.1.6/go.mod h1:1JJMcUBvfNwpq05QDQVAnx3gUHr9IYF7GNg9SUEw2VQ=
github.com/shoenig/test v0.6.4 h1:kVTaSd7WLz5WZ2IaoM0RSzRsUD+m8wRR+5qvntpn4LU=
github.com/shoenig/test v0.6.4/go.mod h1:byHiCGXqrVaflBLAMq/srcZIHynQPQgeyvkvXnjqq0k=
+github.com/shopspring/decimal v1.2.0 h1:abSATXmQEYyShuxI4/vyW3tV1MrKAJzCZ/0zLUXYbsQ=
+github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
+github.com/spf13/cast v1.3.1 h1:nFm6S0SMdyzrzcmThSipiEubIDy8WEXKNZ0UOgiRpng=
+github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
+github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
@@ -245,6 +264,7 @@ github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMx
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU=
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
github.com/yuin/goldmark v1.5.2 h1:ALmeCk/px5FSm1MAcFBAsVKZjDuMVj8Tm7FFIlMJnqU=
github.com/yuin/goldmark v1.5.2/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
github.com/yuin/goldmark-emoji v1.0.1 h1:ctuWEyzGBwiucEqxzwe0SOYDXPAucOrE9NQC18Wa1os=
@@ -266,7 +286,12 @@ go.opentelemetry.io/otel/trace v1.19.0/go.mod h1:mfaSyvGyEJEI0nyV2I4qhNQnbBOUUmY
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
+golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
+golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4=
+golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc=
+golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc=
golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -274,14 +299,18 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
+golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk=
+golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.0.0-20221002022538-bcab6841153b/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk=
+golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY=
golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -297,6 +326,8 @@ golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
@@ -311,6 +342,7 @@ golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y=
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
+golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc=
golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U=
golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek=
golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U=
@@ -318,11 +350,13 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
+golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
+golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.12.0 h1:YW6HUoUmYBpwSgyaGaZq1fHjrBjX1rlpZ54T6mu2kss=
golang.org/x/tools v0.12.0/go.mod h1:Sc0INKfu04TlqNoRA1hgpFZbhYXHPr4V5DzpSBTPqQM=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
diff --git a/pkg/model/loader.go b/pkg/model/loader.go
index bea32fb7..c2c9df0e 100644
--- a/pkg/model/loader.go
+++ b/pkg/model/loader.go
@@ -10,6 +10,7 @@ import (
"sync"
"text/template"
+ "github.com/Masterminds/sprig/v3"
grammar "github.com/go-skynet/LocalAI/pkg/grammar"
"github.com/go-skynet/LocalAI/pkg/grpc"
process "github.com/mudler/go-processmanager"
@@ -36,6 +37,9 @@ type ChatMessageTemplateData struct {
FunctionName string
Content string
MessageIndex int
+ Function bool
+ FunctionCall interface{}
+ LastMessage bool
}
// Keep this in sync with config.TemplateConfig. Is there a more idiomatic way to accomplish this in go?
@@ -261,7 +265,7 @@ func (ml *ModelLoader) loadTemplateIfExists(templateType TemplateType, templateN
}
// Parse the template
- tmpl, err := template.New("prompt").Parse(dat)
+ tmpl, err := template.New("prompt").Funcs(sprig.FuncMap()).Parse(dat)
if err != nil {
return err
}
diff --git a/pkg/startup/model_preload.go b/pkg/startup/model_preload.go
index cc514334..979b4d83 100644
--- a/pkg/startup/model_preload.go
+++ b/pkg/startup/model_preload.go
@@ -60,7 +60,23 @@ func PreloadModelsConfigurations(modelLibraryURL string, modelPath string, model
}
}
default:
- log.Warn().Msgf("[startup] failed resolving model '%s'", url)
+ if _, err := os.Stat(url); err == nil {
+ log.Debug().Msgf("[startup] resolved local model: %s", url)
+ // copy to modelPath
+ md5Name := utils.MD5(url)
+
+ modelYAML, err := os.ReadFile(url)
+ if err != nil {
+ log.Error().Msgf("error loading model: %s", err.Error())
+ continue
+ }
+
+ if err := os.WriteFile(filepath.Join(modelPath, md5Name)+".yaml", modelYAML, os.ModePerm); err != nil {
+ log.Error().Msgf("error loading model: %s", err.Error())
+ }
+ } else {
+ log.Warn().Msgf("[startup] failed resolving model '%s'", url)
+ }
}
}
}
From 3cf64d1e7e835224da0ad5a3df5dcf8f675722f4 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Thu, 21 Mar 2024 08:57:41 +0100
Subject: [PATCH 0018/2750] Update README.md
Signed-off-by: Ettore Di Giacinto
---
README.md | 1 +
1 file changed, 1 insertion(+)
diff --git a/README.md b/README.md
index 082da33e..c58428f7 100644
--- a/README.md
+++ b/README.md
@@ -43,6 +43,7 @@
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
+- All-in-one image: https://github.com/mudler/LocalAI/issues/1855
- Parallel function calling: https://github.com/mudler/LocalAI/pull/1726
- Upload file API: https://github.com/mudler/LocalAI/pull/1703
- Tools API support: https://github.com/mudler/LocalAI/pull/1715
From 743095b7d82b20998b4dde1e557292c7fcc6bd82 Mon Sep 17 00:00:00 2001
From: Sebastian
Date: Thu, 21 Mar 2024 22:08:33 +0100
Subject: [PATCH 0019/2750] docs(mac): improve documentation for mac build
(#1873)
* docs(mac): Improve documentation for mac build
- added documentation to build from current master
- added troubleshooting information
Signed-off-by: Sebastian
* docs(max): fix typo
Signed-off-by: Sebastian
---------
Signed-off-by: Sebastian
---
docs/content/docs/getting-started/build.md | 27 +++++++++++++++++-----
1 file changed, 21 insertions(+), 6 deletions(-)
diff --git a/docs/content/docs/getting-started/build.md b/docs/content/docs/getting-started/build.md
index b26a16d7..238bdbec 100644
--- a/docs/content/docs/getting-started/build.md
+++ b/docs/content/docs/getting-started/build.md
@@ -45,6 +45,8 @@ To install the dependencies follow the instructions below:
{{< tabs tabTotal="3" >}}
{{% tab tabName="Apple" %}}
+Install `xcode` from the App Store
+
```bash
brew install abseil cmake go grpc protobuf wget
```
@@ -111,10 +113,12 @@ docker run --rm -ti -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS
### Example: Build on mac
-Building on Mac (M1 or M2) works, but you may need to install some prerequisites using `brew`.
+Building on Mac (M1, M2 or M3) works, but you may need to install some prerequisites using `brew`.
The below has been tested by one mac user and found to work. Note that this doesn't use Docker to run the server:
+Install `xcode` from the Apps Store (needed for metalkit)
+
```
# install build dependencies
brew install abseil cmake go grpc protobuf wget
@@ -146,8 +150,20 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso
}'
```
-### Build with Image generation support
+#### Troublshooting mac
+If you encounter errors regarding a missing utility metal, install `Xcode` from the App Store.
+If completions are slow, ensure that `gpu-layers` in your model yaml matches the number of layers from the model in use (or simply use a high number such as 256).
+If you a get a compile error: `error: only virtual member functions can be marked 'final'`, reinstall all the necessary brew packages, clean the build, and try again.
+
+```
+# reinstall build dependencies
+brew reinstall abseil cmake go grpc protobuf wget
+
+make clean
+
+make build
+```
**Requirements**: OpenCV, Gomp
@@ -239,13 +255,12 @@ make BUILD_TYPE=sycl_f32 build # for float32
#### Metal (Apple Silicon)
```
-make BUILD_TYPE=metal build
+make build
-# Set `gpu_layers: 1` to your YAML model config file and `f16: true`
-# Note: only models quantized with q4_0 are supported!
+# correct build type is automatically used on mac (BUILD_TYPE=metal)
+# Set `gpu_layers: 256` (or equal to the number of model layers) to your YAML model config file and `f16: true`
```
-
### Windows compatibility
Make sure to give enough resources to the running container. See https://github.com/go-skynet/LocalAI/issues/2
From abc9360dc62863af1c484f914cf2b0948169fb02 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Thu, 21 Mar 2024 22:09:04 +0100
Subject: [PATCH 0020/2750] feat(aio): entrypoint, update workflows (#1872)
---
.github/workflows/image.yml | 5 ++
.github/workflows/image_build.yml | 68 ++++++++++++++++++++-
Dockerfile.aio | 7 +--
Makefile | 5 +-
aio/cpu/README.md | 5 ++
aio/cpu/embeddings.yaml | 13 ++--
aio/entrypoint.sh | 98 +++++++++++++++++++++++++++++++
aio/gpu-8g/embeddings.yaml | 4 +-
aio/gpu-8g/image-gen.yaml | 2 +-
9 files changed, 191 insertions(+), 16 deletions(-)
create mode 100644 aio/cpu/README.md
create mode 100755 aio/entrypoint.sh
diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index 5ba0f1bf..8e2bbbdd 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -26,6 +26,7 @@ jobs:
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
base-image: ${{ matrix.base-image }}
+ aio: ${{ matrix.aio }}
makeflags: "-j3"
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
@@ -86,6 +87,7 @@ jobs:
image-type: 'extras'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
+ aio: "-aio-gpu-nvidia-cuda-11"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "1"
@@ -96,6 +98,7 @@ jobs:
image-type: 'extras'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
+ aio: "-aio-gpu-nvidia-cuda-12"
- build-type: ''
#platforms: 'linux/amd64,linux/arm64'
platforms: 'linux/amd64'
@@ -199,6 +202,7 @@ jobs:
cuda-minor-version: ${{ matrix.cuda-minor-version }}
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
+ aio: ${{ matrix.aio }}
base-image: ${{ matrix.base-image }}
makeflags: "-j3"
secrets:
@@ -217,6 +221,7 @@ jobs:
image-type: 'core'
base-image: "ubuntu:22.04"
runs-on: 'ubuntu-latest'
+ aio: "-aio-cpu"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml
index a978f1bf..22f72131 100644
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -51,6 +51,11 @@ on:
required: false
default: ''
type: string
+ aio:
+ description: 'AIO Image Name'
+ required: false
+ default: ''
+ type: string
secrets:
dockerUsername:
required: true
@@ -129,7 +134,30 @@ jobs:
flavor: |
latest=${{ inputs.tag-latest }}
suffix=${{ inputs.tag-suffix }}
-
+ - name: Docker meta AIO (quay.io)
+ if: inputs.aio != ''
+ id: meta_aio
+ uses: docker/metadata-action@v5
+ with:
+ images: |
+ quay.io/go-skynet/local-ai
+ tags: |
+ type=ref,event=branch
+ type=semver,pattern={{raw}}
+ flavor: |
+ suffix=${{ inputs.aio }}
+ - name: Docker meta AIO (dockerhub)
+ if: inputs.aio != ''
+ id: meta_aio_dockerhub
+ uses: docker/metadata-action@v5
+ with:
+ images: |
+ localai/localai
+ tags: |
+ type=ref,event=branch
+ type=semver,pattern={{raw}}
+ flavor: |
+ suffix=${{ inputs.aio }}
- name: Set up QEMU
uses: docker/setup-qemu-action@master
with:
@@ -172,6 +200,44 @@ jobs:
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
+ -
+ name: Inspect image
+ if: github.event_name != 'pull_request'
+ run: |
+ docker pull localai/localai:${{ steps.meta.outputs.version }}
+ docker image inspect localai/localai:${{ steps.meta.outputs.version }}
+ docker pull quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
+ docker image inspect quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
+ - name: Build and push AIO image
+ if: inputs.aio != ''
+ uses: docker/build-push-action@v5
+ with:
+ builder: ${{ steps.buildx.outputs.name }}
+ build-args: |
+ BASE_IMAGE=quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
+ context: .
+ file: ./Dockerfile.aio
+ platforms: ${{ inputs.platforms }}
+ push: ${{ github.event_name != 'pull_request' }}
+ tags: ${{ steps.meta_aio.outputs.tags }}
+ labels: ${{ steps.meta_aio.outputs.labels }}
+ - name: Build and push AIO image (dockerhub)
+ if: inputs.aio != ''
+ uses: docker/build-push-action@v5
+ with:
+ builder: ${{ steps.buildx.outputs.name }}
+ build-args: |
+ BASE_IMAGE=localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }}
+ context: .
+ file: ./Dockerfile.aio
+ platforms: ${{ inputs.platforms }}
+ push: ${{ github.event_name != 'pull_request' }}
+ tags: ${{ steps.meta_aio_dockerhub.outputs.tags }}
+ labels: ${{ steps.meta_aio_dockerhub.outputs.labels }}
- name: job summary
run: |
echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
+ - name: job summary(AIO)
+ if: inputs.aio != ''
+ run: |
+ echo "Built image: ${{ steps.meta_aio.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
\ No newline at end of file
diff --git a/Dockerfile.aio b/Dockerfile.aio
index 4097e6d5..81063bb4 100644
--- a/Dockerfile.aio
+++ b/Dockerfile.aio
@@ -1,9 +1,8 @@
ARG BASE_IMAGE=ubuntu:22.04
FROM ${BASE_IMAGE}
-ARG SIZE=cpu
-ENV MODELS="/aio-models/embeddings.yaml,/aio-models/text-to-speech.yaml,/aio-models/image-gen.yaml,/aio-models/text-to-text.yaml,/aio-models/speech-to-text.yaml,/aio-models/vision.yaml"
-COPY aio/${SIZE} /aio-models
+RUN apt-get update && apt-get install -y pciutils && apt-get clean
-ENTRYPOINT [ "/build/entrypoint.sh" ]
\ No newline at end of file
+COPY aio/ /aio
+ENTRYPOINT [ "/aio/entrypoint.sh" ]
\ No newline at end of file
diff --git a/Makefile b/Makefile
index c03091d0..96347307 100644
--- a/Makefile
+++ b/Makefile
@@ -536,7 +536,6 @@ grpcs: prepare $(GRPC_BACKENDS)
DOCKER_IMAGE?=local-ai
DOCKER_AIO_IMAGE?=local-ai-aio
-DOCKER_AIO_SIZE?=cpu
IMAGE_TYPE?=core
BASE_IMAGE?=ubuntu:22.04
@@ -549,11 +548,9 @@ docker:
-t $(DOCKER_IMAGE) .
docker-aio:
- @echo "Building AIO image with size $(DOCKER_AIO_SIZE)"
- @echo "Building AIO image with base image $(BASE_IMAGE)"
+ @echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)"
docker build \
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
- --build-arg SIZE=$(DOCKER_AIO_SIZE) \
-t $(DOCKER_AIO_IMAGE) -f Dockerfile.aio .
docker-aio-all:
diff --git a/aio/cpu/README.md b/aio/cpu/README.md
new file mode 100644
index 00000000..8b0b1086
--- /dev/null
+++ b/aio/cpu/README.md
@@ -0,0 +1,5 @@
+## AIO CPU size
+
+Use this image with CPU-only.
+
+Please keep using only C++ backends so the base image is as small as possible (without CUDA, cuDNN, python, etc).
\ No newline at end of file
diff --git a/aio/cpu/embeddings.yaml b/aio/cpu/embeddings.yaml
index 512d63a4..bdee079c 100644
--- a/aio/cpu/embeddings.yaml
+++ b/aio/cpu/embeddings.yaml
@@ -1,13 +1,18 @@
-name: all-minilm-l6-v2
-backend: sentencetransformers
+backend: bert-embeddings
embeddings: true
+f16: true
+
+gpu_layers: 90
+mmap: true
+name: text-embedding-ada-002
+
parameters:
- model: all-MiniLM-L6-v2
+ model: huggingface://mudler/all-MiniLM-L6-v2/ggml-model-q4_0.bin
usage: |
You can test this model with curl like this:
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
"input": "Your text string goes here",
- "model": "all-minilm-l6-v2"
+ "model": "text-embedding-ada-002"
}'
\ No newline at end of file
diff --git a/aio/entrypoint.sh b/aio/entrypoint.sh
new file mode 100755
index 00000000..8c15a5e4
--- /dev/null
+++ b/aio/entrypoint.sh
@@ -0,0 +1,98 @@
+#!/bin/bash
+
+echo "===> LocalAI All-in-One (AIO) container starting..."
+
+GPU_ACCELERATION=false
+GPU_VENDOR=""
+
+function detect_gpu() {
+ case "$(uname -s)" in
+ Linux)
+ if lspci | grep -E 'VGA|3D' | grep -iq nvidia; then
+ echo "NVIDIA GPU detected"
+ # nvidia-smi should be installed in the container
+ if nvidia-smi; then
+ GPU_ACCELERATION=true
+ GPU_VENDOR=nvidia
+ else
+ echo "NVIDIA GPU detected, but nvidia-smi is not installed. GPU acceleration will not be available."
+ fi
+ elif lspci | grep -E 'VGA|3D' | grep -iq amd; then
+ echo "AMD GPU detected"
+ # Check if ROCm is installed
+ if [ -d /opt/rocm ]; then
+ GPU_ACCELERATION=true
+ GPU_VENDOR=amd
+ else
+ echo "AMD GPU detected, but ROCm is not installed. GPU acceleration will not be available."
+ fi
+ elif lspci | grep -E 'VGA|3D' | grep -iq intel; then
+ echo "Intel GPU detected"
+ if [ -d /opt/intel ]; then
+ GPU_ACCELERATION=true
+ else
+ echo "Intel GPU detected, but Intel GPU drivers are not installed. GPU acceleration will not be available."
+ fi
+ fi
+ ;;
+ Darwin)
+ if system_profiler SPDisplaysDataType | grep -iq 'Metal'; then
+ echo "Apple Metal supported GPU detected"
+ GPU_ACCELERATION=true
+ GPU_VENDOR=apple
+ fi
+ ;;
+ esac
+}
+
+function detect_gpu_size() {
+ if [ "$GPU_ACCELERATION" = true ]; then
+ GPU_SIZE=gpu-8g
+ fi
+
+ # Attempting to find GPU memory size for NVIDIA GPUs
+ if echo "$gpu_model" | grep -iq nvidia; then
+ echo "NVIDIA GPU detected. Attempting to find memory size..."
+ nvidia_sm=($(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits))
+ if [ ! -z "$nvidia_sm" ]; then
+ echo "Total GPU Memory: ${nvidia_sm[0]} MiB"
+ else
+ echo "Unable to determine NVIDIA GPU memory size."
+ fi
+ # if bigger than 8GB, use 16GB
+ #if [ "$nvidia_sm" -gt 8192 ]; then
+ # GPU_SIZE=gpu-16g
+ #fi
+ else
+ echo "Non-NVIDIA GPU detected. GPU memory size detection for non-NVIDIA GPUs is not supported in this script."
+ fi
+
+ # default to cpu if GPU_SIZE is not set
+ if [ -z "$GPU_SIZE" ]; then
+ GPU_SIZE=cpu
+ fi
+}
+
+function check_vars() {
+ if [ -z "$MODELS" ]; then
+ echo "MODELS environment variable is not set. Please set it to a comma-separated list of model YAML files to load."
+ exit 1
+ fi
+
+ if [ -z "$SIZE" ]; then
+ echo "SIZE environment variable is not set. Please set it to one of the following: cpu, gpu-8g, gpu-16g, apple"
+ exit 1
+ fi
+}
+
+detect_gpu
+detect_gpu_size
+
+SIZE=${SIZE:-$GPU_SIZE} # default to cpu
+MODELS=${MODELS:-/aio/${SIZE}/embeddings.yaml,/aio/${SIZE}/text-to-speech.yaml,/aio/${SIZE}/image-gen.yaml,/aio/${SIZE}/text-to-text.yaml,/aio/${SIZE}/speech-to-text.yaml,/aio/${SIZE}/vision.yaml}
+
+check_vars
+
+echo "Starting LocalAI with the following models: $MODELS"
+
+/build/entrypoint.sh "$@"
\ No newline at end of file
diff --git a/aio/gpu-8g/embeddings.yaml b/aio/gpu-8g/embeddings.yaml
index 512d63a4..98b519d5 100644
--- a/aio/gpu-8g/embeddings.yaml
+++ b/aio/gpu-8g/embeddings.yaml
@@ -1,4 +1,4 @@
-name: all-minilm-l6-v2
+name: text-embedding-ada-002
backend: sentencetransformers
embeddings: true
parameters:
@@ -9,5 +9,5 @@ usage: |
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
"input": "Your text string goes here",
- "model": "all-minilm-l6-v2"
+ "model": "text-embedding-ada-002"
}'
\ No newline at end of file
diff --git a/aio/gpu-8g/image-gen.yaml b/aio/gpu-8g/image-gen.yaml
index 3857cd6b..74cefc1d 100644
--- a/aio/gpu-8g/image-gen.yaml
+++ b/aio/gpu-8g/image-gen.yaml
@@ -4,7 +4,7 @@ parameters:
backend: diffusers
step: 25
f16: true
-cuda: true
+
diffusers:
pipeline_type: StableDiffusionPipeline
cuda: true
From 418ba020259fe954333ae810bfaa13dc813b0bb1 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Fri, 22 Mar 2024 09:14:17 +0100
Subject: [PATCH 0021/2750] ci: fix typo
Signed-off-by: Ettore Di Giacinto
---
.github/workflows/image_build.yml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml
index 22f72131..8f1386c6 100644
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -227,7 +227,7 @@ jobs:
with:
builder: ${{ steps.buildx.outputs.name }}
build-args: |
- BASE_IMAGE=localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }}
+ BASE_IMAGE=localai/localai:${{ steps.meta.outputs.version }}
context: .
file: ./Dockerfile.aio
platforms: ${{ inputs.platforms }}
@@ -240,4 +240,4 @@ jobs:
- name: job summary(AIO)
if: inputs.aio != ''
run: |
- echo "Built image: ${{ steps.meta_aio.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
\ No newline at end of file
+ echo "Built image: ${{ steps.meta_aio.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
From 07468c8786e8e4ad16d275b48367880e2326df16 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Fri, 22 Mar 2024 09:14:42 +0100
Subject: [PATCH 0022/2750] :arrow_up: Update ggerganov/llama.cpp (#1874)
Signed-off-by: GitHub
Co-authored-by: mudler
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index 96347307..1509ed93 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=1c51f98adcbad40e3c41f0a6ffadeb723190b417
+CPPLLAMA_VERSION?=d0a71233fbf8ade8ef06ad8e6b81d1d7b254895f
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
From dd84c29a3dbb367e9e98d39374fc1263ce11cbff Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Fri, 22 Mar 2024 09:14:56 +0100
Subject: [PATCH 0023/2750] :arrow_up: Update ggerganov/whisper.cpp (#1875)
Signed-off-by: GitHub
Co-authored-by: mudler
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index 1509ed93..8aedf897 100644
--- a/Makefile
+++ b/Makefile
@@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
# whisper.cpp version
-WHISPER_CPP_VERSION?=79d5765e7e1a904d976adfd5636da7da43163eb3
+WHISPER_CPP_VERSION?=fff24a0148fe194df4997a738eeceddd724959c3
# bert.cpp version
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
From 600152df2344d8ae04d3782534c5312ee535f43c Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Fri, 22 Mar 2024 20:55:11 +0100
Subject: [PATCH 0024/2750] fix(config): pass by config options, respect
defaults (#1878)
This bug had the unpleasant effect that it ignored defaults passed by
the CLI. For instance threads could be changed only via model config
file.
---
core/config/application_config.go | 15 +++++++++++++++
core/config/backend_config.go | 25 ++++++++++++++-----------
core/startup/startup.go | 6 ++++--
3 files changed, 33 insertions(+), 13 deletions(-)
diff --git a/core/config/application_config.go b/core/config/application_config.go
index f25b4348..03242c3c 100644
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@@ -258,6 +258,21 @@ func WithApiKeys(apiKeys []string) AppOption {
}
}
+// ToConfigLoaderOptions returns a slice of ConfigLoader Option.
+// Some options defined at the application level are going to be passed as defaults for
+// all the configuration for the models.
+// This includes for instance the context size or the number of threads.
+// If a model doesn't set configs directly to the config model file
+// it will use the defaults defined here.
+func (o *ApplicationConfig) ToConfigLoaderOptions() []ConfigLoaderOption {
+ return []ConfigLoaderOption{
+ LoadOptionContextSize(o.ContextSize),
+ LoadOptionDebug(o.Debug),
+ LoadOptionF16(o.F16),
+ LoadOptionThreads(o.Threads),
+ }
+}
+
// func WithMetrics(meter *metrics.Metrics) AppOption {
// return func(o *StartupOptions) {
// o.Metrics = meter
diff --git a/core/config/backend_config.go b/core/config/backend_config.go
index daaf0257..32e10a17 100644
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -188,7 +188,14 @@ func (c *BackendConfig) FunctionToCall() string {
return c.functionCallNameString
}
-func (cfg *BackendConfig) SetDefaults(debug bool, threads, ctx int, f16 bool) {
+func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
+ lo := &LoadOptions{}
+ lo.Apply(opts...)
+
+ ctx := lo.ctxSize
+ threads := lo.threads
+ f16 := lo.f16
+ debug := lo.debug
defaultTopP := 0.7
defaultTopK := 80
defaultTemp := 0.9
@@ -333,9 +340,6 @@ func (lo *LoadOptions) Apply(options ...ConfigLoaderOption) {
// Load a config file for a model
func (cl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath string, opts ...ConfigLoaderOption) (*BackendConfig, error) {
- lo := &LoadOptions{}
- lo.Apply(opts...)
-
// Load a config file if present after the model name
cfg := &BackendConfig{
PredictionOptions: schema.PredictionOptions{
@@ -350,7 +354,9 @@ func (cl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath
// Try loading a model config file
modelConfig := filepath.Join(modelPath, modelName+".yaml")
if _, err := os.Stat(modelConfig); err == nil {
- if err := cl.LoadBackendConfig(modelConfig); err != nil {
+ if err := cl.LoadBackendConfig(
+ modelConfig, opts...,
+ ); err != nil {
return nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
}
cfgExisting, exists = cl.GetBackendConfig(modelName)
@@ -360,7 +366,7 @@ func (cl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath
}
}
- cfg.SetDefaults(lo.debug, lo.threads, lo.ctxSize, lo.f16)
+ cfg.SetDefaults(opts...)
return cfg, nil
}
@@ -371,9 +377,6 @@ func NewBackendConfigLoader() *BackendConfigLoader {
}
}
func ReadBackendConfigFile(file string, opts ...ConfigLoaderOption) ([]*BackendConfig, error) {
- lo := &LoadOptions{}
- lo.Apply(opts...)
-
c := &[]*BackendConfig{}
f, err := os.ReadFile(file)
if err != nil {
@@ -384,7 +387,7 @@ func ReadBackendConfigFile(file string, opts ...ConfigLoaderOption) ([]*BackendC
}
for _, cc := range *c {
- cc.SetDefaults(lo.debug, lo.threads, lo.ctxSize, lo.f16)
+ cc.SetDefaults(opts...)
}
return *c, nil
@@ -403,7 +406,7 @@ func ReadBackendConfig(file string, opts ...ConfigLoaderOption) (*BackendConfig,
return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
}
- c.SetDefaults(lo.debug, lo.threads, lo.ctxSize, lo.f16)
+ c.SetDefaults(opts...)
return c, nil
}
diff --git a/core/startup/startup.go b/core/startup/startup.go
index 43e6646d..828eb7a7 100644
--- a/core/startup/startup.go
+++ b/core/startup/startup.go
@@ -58,12 +58,14 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode
cl := config.NewBackendConfigLoader()
ml := model.NewModelLoader(options.ModelPath)
- if err := cl.LoadBackendConfigsFromPath(options.ModelPath); err != nil {
+ configLoaderOpts := options.ToConfigLoaderOptions()
+
+ if err := cl.LoadBackendConfigsFromPath(options.ModelPath, configLoaderOpts...); err != nil {
log.Error().Msgf("error loading config files: %s", err.Error())
}
if options.ConfigFile != "" {
- if err := cl.LoadBackendConfigFile(options.ConfigFile); err != nil {
+ if err := cl.LoadBackendConfigFile(options.ConfigFile, configLoaderOpts...); err != nil {
log.Error().Msgf("error loading config file: %s", err.Error())
}
}
From 3bec467a91071133f8f74e7ce04d997733ed51b9 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Fri, 22 Mar 2024 21:12:48 +0100
Subject: [PATCH 0025/2750] feat(models): add phi-2-chat, llava-1.6, bakllava,
cerbero (#1879)
---
embedded/models/bakllava.yaml | 40 ++++++++++++++++++++++++++
embedded/models/cerbero.yaml | 24 ++++++++++++++++
embedded/models/llava-1.5.yaml | 33 +++++++++++++++++++++
embedded/models/llava-1.6-mistral.yaml | 33 +++++++++++++++++++++
embedded/models/llava-1.6-vicuna.yaml | 37 ++++++++++++++++++++++++
embedded/models/phi-2-chat.yaml | 25 ++++++++++++++++
6 files changed, 192 insertions(+)
create mode 100644 embedded/models/bakllava.yaml
create mode 100644 embedded/models/cerbero.yaml
create mode 100644 embedded/models/llava-1.5.yaml
create mode 100644 embedded/models/llava-1.6-mistral.yaml
create mode 100644 embedded/models/llava-1.6-vicuna.yaml
create mode 100644 embedded/models/phi-2-chat.yaml
diff --git a/embedded/models/bakllava.yaml b/embedded/models/bakllava.yaml
new file mode 100644
index 00000000..52fd9466
--- /dev/null
+++ b/embedded/models/bakllava.yaml
@@ -0,0 +1,40 @@
+backend: llama-cpp
+context_size: 4096
+f16: true
+
+gpu_layers: 90
+mmap: true
+name: bakllava
+
+roles:
+ user: "USER:"
+ assistant: "ASSISTANT:"
+ system: "SYSTEM:"
+
+mmproj: bakllava-mmproj.gguf
+parameters:
+ model: bakllava.gguf
+ temperature: 0.2
+ top_k: 40
+ top_p: 0.95
+ seed: -1
+mirostat: 2
+mirostat_eta: 1.0
+mirostat_tau: 1.0
+
+template:
+ chat: |
+ A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
+ {{.Input}}
+ ASSISTANT:
+
+download_files:
+- filename: bakllava.gguf
+ uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf
+- filename: bakllava-mmproj.gguf
+ uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf
+
+usage: |
+ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+ "model": "bakllava",
+ "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
diff --git a/embedded/models/cerbero.yaml b/embedded/models/cerbero.yaml
new file mode 100644
index 00000000..8ace4e35
--- /dev/null
+++ b/embedded/models/cerbero.yaml
@@ -0,0 +1,24 @@
+backend: llama
+context_size: 8192
+f16: false
+gpu_layers: 90
+name: cerbero
+mmap: false
+parameters:
+ model: huggingface://galatolo/cerbero-7b-gguf/ggml-model-Q8_0.gguf
+ top_k: 80
+ temperature: 0.2
+ top_p: 0.7
+template:
+ completion: "{{.Input}}"
+ chat: "Questa è una conversazione tra un umano ed un assistente AI.\n{{.Input}}\n[|Assistente|] "
+roles:
+ user: "[|Umano|] "
+ system: "[|Umano|] "
+ assistant: "[|Assistente|] "
+
+stopwords:
+- "[|Umano|]"
+
+trimsuffix:
+- "\n"
\ No newline at end of file
diff --git a/embedded/models/llava-1.5.yaml b/embedded/models/llava-1.5.yaml
new file mode 100644
index 00000000..3db48524
--- /dev/null
+++ b/embedded/models/llava-1.5.yaml
@@ -0,0 +1,33 @@
+backend: llama-cpp
+context_size: 4096
+f16: true
+
+gpu_layers: 90
+mmap: true
+name: llava-1.5
+
+roles:
+ user: "USER:"
+ assistant: "ASSISTANT:"
+ system: "SYSTEM:"
+
+mmproj: llava-v1.5-7b-mmproj-Q8_0.gguf
+parameters:
+ model: llava-v1.5-7b-Q4_K.gguf
+
+template:
+ chat: |
+ A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
+ {{.Input}}
+ ASSISTANT:
+
+download_files:
+- filename: llava-v1.5-7b-Q4_K.gguf
+ uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-Q4_K.gguf
+- filename: llava-v1.5-7b-mmproj-Q8_0.gguf
+ uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-mmproj-Q8_0.gguf
+
+usage: |
+ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+ "model": "llava-1.5",
+ "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
diff --git a/embedded/models/llava-1.6-mistral.yaml b/embedded/models/llava-1.6-mistral.yaml
new file mode 100644
index 00000000..602ceb62
--- /dev/null
+++ b/embedded/models/llava-1.6-mistral.yaml
@@ -0,0 +1,33 @@
+backend: llama-cpp
+context_size: 4096
+f16: true
+
+gpu_layers: 90
+mmap: true
+name: llava-1.6-mistral
+
+roles:
+ user: "USER:"
+ assistant: "ASSISTANT:"
+ system: "SYSTEM:"
+
+mmproj: llava-v1.6-7b-mmproj-f16.gguf
+parameters:
+ model: llava-v1.6-mistral-7b.gguf
+
+template:
+ chat: |
+ A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
+ {{.Input}}
+ ASSISTANT:
+
+download_files:
+- filename: llava-v1.6-mistral-7b.gguf
+ uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q6_K.gguf
+- filename: llava-v1.6-7b-mmproj-f16.gguf
+ uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf
+
+usage: |
+ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+ "model": "llava-1.6-mistral",
+ "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
diff --git a/embedded/models/llava-1.6-vicuna.yaml b/embedded/models/llava-1.6-vicuna.yaml
new file mode 100644
index 00000000..cea33e7f
--- /dev/null
+++ b/embedded/models/llava-1.6-vicuna.yaml
@@ -0,0 +1,37 @@
+backend: llama-cpp
+context_size: 4096
+f16: true
+
+gpu_layers: 90
+mmap: true
+name: llava-1.6-vicuna
+
+roles:
+ user: "USER:"
+ assistant: "ASSISTANT:"
+ system: "SYSTEM:"
+
+mmproj: mmproj-vicuna7b-f16.gguf
+parameters:
+ model: vicuna-7b-q5_k.gguf
+ temperature: 0.2
+ top_k: 40
+ top_p: 0.95
+ seed: -1
+
+template:
+ chat: |
+ A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
+ {{.Input}}
+ ASSISTANT:
+
+download_files:
+- filename: vicuna-7b-q5_k.gguf
+ uri: https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/vicuna-7b-q5_k.gguf
+- filename: mmproj-vicuna7b-f16.gguf
+ uri: https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/mmproj-vicuna7b-f16.gguf
+
+usage: |
+ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+ "model": "llava-1.6-vicuna",
+ "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
diff --git a/embedded/models/phi-2-chat.yaml b/embedded/models/phi-2-chat.yaml
new file mode 100644
index 00000000..4a3ca7aa
--- /dev/null
+++ b/embedded/models/phi-2-chat.yaml
@@ -0,0 +1,25 @@
+name: phi-2-chat
+mmap: true
+parameters:
+ model: huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf
+
+template:
+ chat_message: |
+ <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
+ {{if .Content}}{{.Content}}{{end}}
+ <|im_end|>
+ chat: |
+ {{.Input}}
+ <|im_start|>assistant
+ completion: |
+ {{.Input}}
+context_size: 4096
+f16: true
+stopwords:
+- <|im_end|>
+-
+usage: |
+ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+ "model": "phi-2-chat",
+ "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
+ }'
From 4b1ee0c1701a125125a78ecde892ec7b5e903fbd Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Fri, 22 Mar 2024 21:13:11 +0100
Subject: [PATCH 0026/2750] feat(aio): add tests, update model definitions
(#1880)
---
.github/workflows/test.yml | 52 +++++++++++
Makefile | 4 +
aio/cpu/speech-to-text.yaml | 4 +-
aio/cpu/text-to-speech.yaml | 2 +-
aio/cpu/text-to-text.yaml | 31 ++++---
aio/cpu/vision.yaml | 4 +-
aio/entrypoint.sh | 4 +-
aio/gpu-8g/image-gen.yaml | 2 +-
aio/gpu-8g/speech-to-text.yaml | 4 +-
aio/gpu-8g/text-to-speech.yaml | 4 +-
aio/gpu-8g/text-to-text.yaml | 4 +-
aio/gpu-8g/vision.yaml | 19 ++--
go.mod | 25 +++++-
go.sum | 93 +++++++++++++++++++
tests/e2e-aio/e2e_suite_test.go | 97 ++++++++++++++++++++
tests/e2e-aio/e2e_test.go | 152 ++++++++++++++++++++++++++++++++
16 files changed, 461 insertions(+), 40 deletions(-)
create mode 100644 tests/e2e-aio/e2e_suite_test.go
create mode 100644 tests/e2e-aio/e2e_test.go
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 8222508a..6d837821 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -110,6 +110,58 @@ jobs:
uses: mxschmitt/action-tmate@v3
timeout-minutes: 5
+ tests-aio-container:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Release space from worker
+ run: |
+ echo "Listing top largest packages"
+ pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
+ head -n 30 <<< "${pkgs}"
+ echo
+ df -h
+ echo
+ sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
+ sudo apt-get remove --auto-remove android-sdk-platform-tools || true
+ sudo apt-get purge --auto-remove android-sdk-platform-tools || true
+ sudo rm -rf /usr/local/lib/android
+ sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
+ sudo rm -rf /usr/share/dotnet
+ sudo apt-get remove -y '^mono-.*' || true
+ sudo apt-get remove -y '^ghc-.*' || true
+ sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
+ sudo apt-get remove -y 'php.*' || true
+ sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
+ sudo apt-get remove -y '^google-.*' || true
+ sudo apt-get remove -y azure-cli || true
+ sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
+ sudo apt-get remove -y '^gfortran-.*' || true
+ sudo apt-get autoremove -y
+ sudo apt-get clean
+ echo
+ echo "Listing top largest packages"
+ pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
+ head -n 30 <<< "${pkgs}"
+ echo
+ sudo rm -rfv build || true
+ df -h
+ - name: Clone
+ uses: actions/checkout@v4
+ with:
+ submodules: true
+ - name: Build images
+ run: |
+ docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=core -t local-ai:tests -f Dockerfile .
+ BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
+ - name: Test
+ run: |
+ LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \
+ make run-e2e-aio
+ - name: Setup tmate session if tests fail
+ if: ${{ failure() }}
+ uses: mxschmitt/action-tmate@v3
+ timeout-minutes: 5
+
tests-apple:
runs-on: macOS-14
strategy:
diff --git a/Makefile b/Makefile
index 8aedf897..95af1936 100644
--- a/Makefile
+++ b/Makefile
@@ -353,6 +353,10 @@ run-e2e-image:
ls -liah $(abspath ./tests/e2e-fixtures)
docker run -p 5390:8080 -e MODELS_PATH=/models -e THREADS=1 -e DEBUG=true -d --rm -v $(TEST_DIR):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests
+run-e2e-aio:
+ @echo 'Running e2e AIO tests'
+ $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e-aio
+
test-e2e:
@echo 'Running e2e tests'
BUILD_TYPE=$(BUILD_TYPE) \
diff --git a/aio/cpu/speech-to-text.yaml b/aio/cpu/speech-to-text.yaml
index f7ebd217..77850d79 100644
--- a/aio/cpu/speech-to-text.yaml
+++ b/aio/cpu/speech-to-text.yaml
@@ -1,4 +1,4 @@
-name: whisper
+name: whisper-1
backend: whisper
parameters:
model: ggml-whisper-base.bin
@@ -10,7 +10,7 @@ usage: |
## Send the example audio file to the transcriptions endpoint
curl http://localhost:8080/v1/audio/transcriptions \
-H "Content-Type: multipart/form-data" \
- -F file="@$PWD/gb1.ogg" -F model="whisper"
+ -F file="@$PWD/gb1.ogg" -F model="whisper-1"
download_files:
- filename: "ggml-whisper-base.bin"
diff --git a/aio/cpu/text-to-speech.yaml b/aio/cpu/text-to-speech.yaml
index 93c11403..91998e6a 100644
--- a/aio/cpu/text-to-speech.yaml
+++ b/aio/cpu/text-to-speech.yaml
@@ -1,4 +1,4 @@
-name: voice-en-us-amy-low
+name: tts-1
download_files:
- filename: voice-en-us-amy-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
diff --git a/aio/cpu/text-to-text.yaml b/aio/cpu/text-to-text.yaml
index 7558ba9f..4fd88500 100644
--- a/aio/cpu/text-to-text.yaml
+++ b/aio/cpu/text-to-text.yaml
@@ -1,22 +1,25 @@
-name: gpt-3.5-turbo
-context_size: 2048
-f16: true
-gpu_layers: 90
+name: gpt-4
mmap: true
-trimsuffix:
-- "\n"
parameters:
- model: huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf
+ model: huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf
template:
- chat: &template |-
- Instruct: {{.Input}}
- Output:
- completion: *template
-
+ chat_message: |
+ <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
+ {{if .Content}}{{.Content}}{{end}}
+ <|im_end|>
+ chat: |
+ {{.Input}}
+ <|im_start|>assistant
+ completion: |
+ {{.Input}}
+context_size: 2048
+f16: true
+stopwords:
+- <|im_end|>
+-
usage: |
- To use this model, interact with the API (in another terminal) with curl for instance:
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
- "model": "phi-2",
+ "model": "phi-2-chat",
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
}'
diff --git a/aio/cpu/vision.yaml b/aio/cpu/vision.yaml
index 3d240681..0777f715 100644
--- a/aio/cpu/vision.yaml
+++ b/aio/cpu/vision.yaml
@@ -4,7 +4,7 @@ f16: true
gpu_layers: 90
mmap: true
-name: llava
+name: gpt-4-vision-preview
roles:
user: "USER:"
@@ -36,5 +36,5 @@ download_files:
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
- "model": "llava",
+ "model": "gpt-4-vision-preview",
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
diff --git a/aio/entrypoint.sh b/aio/entrypoint.sh
index 8c15a5e4..b2f64f63 100755
--- a/aio/entrypoint.sh
+++ b/aio/entrypoint.sh
@@ -88,8 +88,8 @@ function check_vars() {
detect_gpu
detect_gpu_size
-SIZE=${SIZE:-$GPU_SIZE} # default to cpu
-MODELS=${MODELS:-/aio/${SIZE}/embeddings.yaml,/aio/${SIZE}/text-to-speech.yaml,/aio/${SIZE}/image-gen.yaml,/aio/${SIZE}/text-to-text.yaml,/aio/${SIZE}/speech-to-text.yaml,/aio/${SIZE}/vision.yaml}
+SIZE="${SIZE:-$GPU_SIZE}" # default to cpu
+export MODELS="${MODELS:-/aio/${SIZE}/embeddings.yaml,/aio/${SIZE}/text-to-speech.yaml,/aio/${SIZE}/image-gen.yaml,/aio/${SIZE}/text-to-text.yaml,/aio/${SIZE}/speech-to-text.yaml,/aio/${SIZE}/vision.yaml}"
check_vars
diff --git a/aio/gpu-8g/image-gen.yaml b/aio/gpu-8g/image-gen.yaml
index 74cefc1d..9868572f 100644
--- a/aio/gpu-8g/image-gen.yaml
+++ b/aio/gpu-8g/image-gen.yaml
@@ -1,4 +1,4 @@
-name: dreamshaper
+name: stablediffusion
parameters:
model: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors
backend: diffusers
diff --git a/aio/gpu-8g/speech-to-text.yaml b/aio/gpu-8g/speech-to-text.yaml
index f7ebd217..77850d79 100644
--- a/aio/gpu-8g/speech-to-text.yaml
+++ b/aio/gpu-8g/speech-to-text.yaml
@@ -1,4 +1,4 @@
-name: whisper
+name: whisper-1
backend: whisper
parameters:
model: ggml-whisper-base.bin
@@ -10,7 +10,7 @@ usage: |
## Send the example audio file to the transcriptions endpoint
curl http://localhost:8080/v1/audio/transcriptions \
-H "Content-Type: multipart/form-data" \
- -F file="@$PWD/gb1.ogg" -F model="whisper"
+ -F file="@$PWD/gb1.ogg" -F model="whisper-1"
download_files:
- filename: "ggml-whisper-base.bin"
diff --git a/aio/gpu-8g/text-to-speech.yaml b/aio/gpu-8g/text-to-speech.yaml
index 93c11403..8d875a29 100644
--- a/aio/gpu-8g/text-to-speech.yaml
+++ b/aio/gpu-8g/text-to-speech.yaml
@@ -1,4 +1,4 @@
-name: voice-en-us-amy-low
+name: tts-1
download_files:
- filename: voice-en-us-amy-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
@@ -10,6 +10,6 @@ usage: |
To test if this model works as expected, you can use the following curl command:
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
- "model":"voice-en-us-amy-low",
+ "model":"tts-1",
"input": "Hi, this is a test."
}'
\ No newline at end of file
diff --git a/aio/gpu-8g/text-to-text.yaml b/aio/gpu-8g/text-to-text.yaml
index d91e057c..c6f26c07 100644
--- a/aio/gpu-8g/text-to-text.yaml
+++ b/aio/gpu-8g/text-to-text.yaml
@@ -1,4 +1,4 @@
-name: gpt-3.5-turbo
+name: gpt-4
mmap: true
parameters:
model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf
@@ -46,6 +46,6 @@ stopwords:
-
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
- "model": "hermes-2-pro-mistral",
+ "model": "gpt-4",
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
}'
diff --git a/aio/gpu-8g/vision.yaml b/aio/gpu-8g/vision.yaml
index 3d240681..02542503 100644
--- a/aio/gpu-8g/vision.yaml
+++ b/aio/gpu-8g/vision.yaml
@@ -4,23 +4,20 @@ f16: true
gpu_layers: 90
mmap: true
-name: llava
+name: gpt-4-vision-preview
roles:
user: "USER:"
assistant: "ASSISTANT:"
system: "SYSTEM:"
-mmproj: bakllava-mmproj.gguf
+mmproj: llava-v1.6-7b-mmproj-f16.gguf
parameters:
- model: bakllava.gguf
+ model: llava-v1.6-mistral-7b.Q5_K_M.gguf
temperature: 0.2
top_k: 40
top_p: 0.95
seed: -1
-mirostat: 2
-mirostat_eta: 1.0
-mirostat_tau: 1.0
template:
chat: |
@@ -29,12 +26,12 @@ template:
ASSISTANT:
download_files:
-- filename: bakllava.gguf
- uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf
-- filename: bakllava-mmproj.gguf
- uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf
+- filename: llava-v1.6-mistral-7b.Q5_K_M.gguf
+ uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q5_K_M.gguf
+- filename: llava-v1.6-7b-mmproj-f16.gguf
+ uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
- "model": "llava",
+ "model": "gpt-4-vision-preview",
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
diff --git a/go.mod b/go.mod
index f2c53e84..8a43df1d 100644
--- a/go.mod
+++ b/go.mod
@@ -25,7 +25,7 @@ require (
github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5
github.com/prometheus/client_golang v1.17.0
github.com/rs/zerolog v1.31.0
- github.com/sashabaranov/go-openai v1.16.0
+ github.com/sashabaranov/go-openai v1.20.4
github.com/schollz/progressbar/v3 v3.13.1
github.com/stretchr/testify v1.8.4
github.com/tmc/langchaingo v0.0.0-20231019140956-c636b3da7701
@@ -53,21 +53,32 @@ require (
)
require (
+ github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78 // indirect
github.com/Masterminds/goutils v1.1.1 // indirect
github.com/Masterminds/semver/v3 v3.2.0 // indirect
github.com/Masterminds/sprig/v3 v3.2.3 // indirect
+ github.com/Microsoft/go-winio v0.6.0 // indirect
+ github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 // indirect
github.com/alecthomas/chroma v0.10.0 // indirect
github.com/aymanbagabas/go-osc52 v1.0.3 // indirect
github.com/aymerick/douceur v0.2.0 // indirect
github.com/beorn7/perks v1.0.1 // indirect
+ github.com/cenkalti/backoff/v4 v4.1.3 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/charmbracelet/glamour v0.6.0 // indirect
+ github.com/containerd/continuity v0.3.0 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/dlclark/regexp2 v1.8.1 // indirect
+ github.com/docker/cli v20.10.17+incompatible // indirect
+ github.com/docker/docker v20.10.7+incompatible // indirect
+ github.com/docker/go-connections v0.4.0 // indirect
+ github.com/docker/go-units v0.4.0 // indirect
github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
+ github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/protobuf v1.5.3 // indirect
github.com/golang/snappy v0.0.2 // indirect
+ github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect
github.com/gorilla/css v1.0.0 // indirect
github.com/huandu/xstrings v1.3.3 // indirect
github.com/klauspost/pgzip v1.2.5 // indirect
@@ -76,26 +87,38 @@ require (
github.com/microcosm-cc/bluemonday v1.0.26 // indirect
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
github.com/mitchellh/copystructure v1.0.0 // indirect
+ github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/mitchellh/reflectwalk v1.0.0 // indirect
+ github.com/moby/term v0.0.0-20201216013528-df9cb8a40635 // indirect
github.com/muesli/reflow v0.3.0 // indirect
github.com/muesli/termenv v0.13.0 // indirect
github.com/nwaples/rardecode v1.1.0 // indirect
github.com/olekukonko/tablewriter v0.0.5 // indirect
+ github.com/opencontainers/go-digest v1.0.0 // indirect
+ github.com/opencontainers/image-spec v1.0.2 // indirect
+ github.com/opencontainers/runc v1.1.5 // indirect
+ github.com/ory/dockertest/v3 v3.10.0 // indirect
github.com/pierrec/lz4/v4 v4.1.2 // indirect
+ github.com/pkg/errors v0.9.1 // indirect
github.com/pkoukk/tiktoken-go v0.1.2 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16 // indirect
github.com/prometheus/common v0.44.0 // indirect
github.com/prometheus/procfs v0.11.1 // indirect
github.com/shopspring/decimal v1.2.0 // indirect
+ github.com/sirupsen/logrus v1.8.1 // indirect
github.com/spf13/cast v1.3.1 // indirect
github.com/ulikunitz/xz v0.5.9 // indirect
+ github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f // indirect
+ github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect
+ github.com/xeipuuv/gojsonschema v1.2.0 // indirect
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect
github.com/yuin/goldmark v1.5.2 // indirect
github.com/yuin/goldmark-emoji v1.0.1 // indirect
go.opentelemetry.io/otel/sdk v1.19.0 // indirect
go.opentelemetry.io/otel/trace v1.19.0 // indirect
golang.org/x/crypto v0.14.0 // indirect
+ golang.org/x/mod v0.12.0 // indirect
golang.org/x/term v0.13.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d // indirect
gopkg.in/fsnotify.v1 v1.4.7 // indirect
diff --git a/go.sum b/go.sum
index 7238ceba..bef84d57 100644
--- a/go.sum
+++ b/go.sum
@@ -1,3 +1,6 @@
+github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78 h1:w+iIsaOQNcT7OZ575w+acHgRric5iCyQh+xv+KJ4HB8=
+github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX56iTiv29bbRTIsUNlaFfuhWRQBWjQdVyAevI8=
+github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf h1:UgjXLcE9I+VaVz7uBIlzAnyZIXwiDlIiTWqCh159aUI=
github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf/go.mod h1:UOf2Mb/deUri5agct5OJ4SLWjhI+kZKbsUVUeRb24I0=
github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
@@ -6,6 +9,10 @@ github.com/Masterminds/semver/v3 v3.2.0 h1:3MEsd0SM6jqZojhjLWWeBY+Kcjy9i6MQAeY7Y
github.com/Masterminds/semver/v3 v3.2.0/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ=
github.com/Masterminds/sprig/v3 v3.2.3 h1:eL2fZNezLomi0uOLqjQoN6BfsDD+fyLtgbJMAj9n6YA=
github.com/Masterminds/sprig/v3 v3.2.3/go.mod h1:rXcFaZ2zZbLRJv/xSysmlgIM1u11eBaRMhvYXJNkGuM=
+github.com/Microsoft/go-winio v0.6.0 h1:slsWYD/zyx7lCXoZVlvQrj0hPTM1HI4+v1sIda2yDvg=
+github.com/Microsoft/go-winio v0.6.0/go.mod h1:cTAf44im0RAYeL23bpB+fzCyDH2MJiz2BO69KH/soAE=
+github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 h1:TngWCqHvy9oXAN6lEVMRuU21PR1EtLVZJmdB18Gu3Rw=
+github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5/go.mod h1:lmUJ/7eu/Q8D7ML55dXQrVaamCz2vxCfdQBasLZfHKk=
github.com/alecthomas/chroma v0.10.0 h1:7XDcGkCQopCNKjZHfYrNLraA+M7e0fMiJ/Mfikbfjek=
github.com/alecthomas/chroma v0.10.0/go.mod h1:jtJATyUxlIORhUOFNA9NZDWGAQ8wpxQQqNSB4rjA/1s=
github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y=
@@ -17,27 +24,47 @@ github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuP
github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
+github.com/cenkalti/backoff/v4 v4.1.3 h1:cFAlzYUlVYDysBEH2T5hyJZMh3+5+WCBvSnK6Q8UtC4=
+github.com/cenkalti/backoff/v4 v4.1.3/go.mod h1:scbssz8iZGpm3xbr14ovlUdkxfGXNInqkPWOWmG2CLw=
github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/charmbracelet/glamour v0.6.0 h1:wi8fse3Y7nfcabbbDuwolqTqMQPMnVPeZhDM273bISc=
github.com/charmbracelet/glamour v0.6.0/go.mod h1:taqWV4swIMMbWALc0m7AfE9JkPSU8om2538k9ITBxOc=
+github.com/checkpoint-restore/go-criu/v5 v5.3.0/go.mod h1:E/eQpaFtUKGOOSEBZgmKAcn+zUUwWxqcaKZlF54wK8E=
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
+github.com/cilium/ebpf v0.7.0/go.mod h1:/oI2+1shJiTGAMgl6/RgJr36Eo1jzrRcAWbcXO2usCA=
+github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U=
+github.com/containerd/continuity v0.3.0 h1:nisirsYROK15TAMVukJOUyGJjz4BNQJBVsNvAXZJ/eg=
+github.com/containerd/continuity v0.3.0/go.mod h1:wJEAIwKOm/pBZuBd0JmeTvnLquTB1Ag8espWhkykbPM=
+github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
+github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w=
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
+github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
+github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dlclark/regexp2 v1.4.0/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc=
github.com/dlclark/regexp2 v1.8.1 h1:6Lcdwya6GjPUNsBct8Lg/yRPwMhABj269AAzdGSiR+0=
github.com/dlclark/regexp2 v1.8.1/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
+github.com/docker/cli v20.10.17+incompatible h1:eO2KS7ZFeov5UJeaDmIs1NFEDRf32PaqRpvoEkKBy5M=
+github.com/docker/cli v20.10.17+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8=
+github.com/docker/docker v20.10.7+incompatible h1:Z6O9Nhsjv+ayUEeI1IojKbYcsGdgYSNqxe1s2MYzUhQ=
+github.com/docker/docker v20.10.7+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
+github.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKohAFqRJQ=
+github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec=
+github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw=
+github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
github.com/donomii/go-rwkv.cpp v0.0.0-20230715075832-c898cd0f62df h1:qVcBEZlvp5A1gGWNJj02xyDtbsUI2hohlQMSB1fgER4=
github.com/donomii/go-rwkv.cpp v0.0.0-20230715075832-c898cd0f62df/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 h1:iFaUwBSo5Svw6L7HYpRu/0lE3e0BaElwnNO1qkNQxBY=
github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s=
github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
+github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4=
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
@@ -66,8 +93,11 @@ github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg78
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
+github.com/godbus/dbus/v5 v5.0.6/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/gofiber/fiber/v2 v2.50.0 h1:ia0JaB+uw3GpNSCR5nvC5dsaxXjRU5OEu36aytx+zGw=
github.com/gofiber/fiber/v2 v2.50.0/go.mod h1:21eytvay9Is7S6z+OgPi7c7n4++tnClWmhpimVHMimw=
+github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
+github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
@@ -84,6 +114,7 @@ github.com/golang/snappy v0.0.2/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEW
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
@@ -91,6 +122,8 @@ github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE8dj7HMvPfh66eeA2JYW7eFpSE=
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
+github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4=
+github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ=
github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4=
github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
@@ -109,6 +142,8 @@ github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH
github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4=
github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY=
github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw=
+github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
+github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.11.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I=
@@ -116,8 +151,11 @@ github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQs
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE=
github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
+github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
+github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
+github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY=
@@ -148,8 +186,14 @@ github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2Em
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw=
github.com/mitchellh/copystructure v1.0.0 h1:Laisrj+bAB6b/yJwB5Bt3ITZhGJdqmxquMKeZ+mmkFQ=
github.com/mitchellh/copystructure v1.0.0/go.mod h1:SNtv71yrdKgLRyLFxmLdkAbkKEFWgYaq1OVrnRcwhnw=
+github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
+github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
github.com/mitchellh/reflectwalk v1.0.0 h1:9D+8oIskB4VJBN5SFlmc27fSlIBZaov1Wpk/IfikLNY=
github.com/mitchellh/reflectwalk v1.0.0/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
+github.com/moby/sys/mountinfo v0.5.0/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdxbhnCLlSvSU=
+github.com/moby/term v0.0.0-20201216013528-df9cb8a40635 h1:rzf0wL0CHVc8CEsgyygG0Mn9CNCCPZqOPaz8RiiHYQk=
+github.com/moby/term v0.0.0-20201216013528-df9cb8a40635/go.mod h1:FBS0z0QWA44HXygs7VXDUOGoN/1TV3RuWkLO04am3wc=
+github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760 h1:OFVkSxR7CRSRSNm5dvpMRZwmSwWa8EMMnHbc84fW5tU=
github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig=
github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c h1:CI5uGwqBpN8N7BrSKC+nmdfw+9nPQIDyjHHlaIiitZI=
@@ -180,6 +224,16 @@ github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1y
github.com/onsi/gomega v1.16.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAlGdZY=
github.com/onsi/gomega v1.28.1 h1:MijcGUbfYuznzK/5R4CPNoUP/9Xvuo20sXfEm6XxoTA=
github.com/onsi/gomega v1.28.1/go.mod h1:9sxs+SwGrKI0+PWe4Fxa9tFQQBG5xSsSbMXOI8PPpoQ=
+github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
+github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
+github.com/opencontainers/image-spec v1.0.2 h1:9yCKha/T5XdGtO0q9Q9a6T5NUCsTn/DrBg0D7ufOcFM=
+github.com/opencontainers/image-spec v1.0.2/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0=
+github.com/opencontainers/runc v1.1.5 h1:L44KXEpKmfWDcS02aeGm8QNTFXTo2D+8MYGDIJ/GDEs=
+github.com/opencontainers/runc v1.1.5/go.mod h1:1J5XiS+vdZ3wCyZybsuxXZWGrgSr8fFJHLXuG2PsnNg=
+github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
+github.com/opencontainers/selinux v1.10.0/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI=
+github.com/ory/dockertest/v3 v3.10.0 h1:4K3z2VMe8Woe++invjaTB7VRyQXQy5UY+loujO4aNE4=
+github.com/ory/dockertest/v3 v3.10.0/go.mod h1:nr57ZbRWMqfsdGdFNLHz5jjNdDb7VVFnzAeW1n5N1Lg=
github.com/otiai10/mint v1.6.1 h1:kgbTJmOpp/0ce7hk3H8jiSuR0MXmpwWRfqUdKww17qg=
github.com/otiai10/mint v1.6.1/go.mod h1:MJm72SBthJjz8qhefc4z1PYEieWmy8Bku7CjcAqyUSM=
github.com/otiai10/openaigo v1.6.0 h1:YTQEbtDSvawETOB/Kmb/6JvuHdHH/eIpSQfHVufiwY8=
@@ -188,6 +242,8 @@ github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5 h1:Ii+DKncOVM8Cu1H
github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5/go.mod h1:iIss55rKnNBTvrwdmkUpLnDpZoAHvWaiq5+iMmen4AE=
github.com/pierrec/lz4/v4 v4.1.2 h1:qvY3YFXRQE/XB8MlLzJH7mSzBs74eA2gg52YTk6jUPM=
github.com/pierrec/lz4/v4 v4.1.2/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
+github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkoukk/tiktoken-go v0.1.2 h1:u7PCSBiWJ3nJYoTGShyM9iHXz4dNyYkurwwp+GHtyHY=
github.com/pkoukk/tiktoken-go v0.1.2/go.mod h1:boMWvk9pQCOTx11pgu0DrIdrAKgQzzJKUP6vLXaz7Rw=
@@ -211,12 +267,16 @@ github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUz
github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
github.com/rs/zerolog v1.31.0 h1:FcTR3NnLWW+NnTwwhFWiJSZr4ECLpqCm6QsEnyvbV4A=
github.com/rs/zerolog v1.31.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
+github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/sashabaranov/go-openai v1.16.0 h1:34W6WV84ey6OpW0p2UewZkdMu82AxGC+BzpU6iiauRw=
github.com/sashabaranov/go-openai v1.16.0/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
+github.com/sashabaranov/go-openai v1.20.4 h1:095xQ/fAtRa0+Rj21sezVJABgKfGPNbyx/sAN/hJUmg=
+github.com/sashabaranov/go-openai v1.20.4/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
github.com/schollz/progressbar/v3 v3.13.1 h1:o8rySDYiQ59Mwzy2FELeHY5ZARXZTVJC7iHD6PEFUiE=
github.com/schollz/progressbar/v3 v3.13.1/go.mod h1:xvrbki8kfT1fzWzBT/UZd9L6GA+jdL7HAgq2RFnO6fQ=
+github.com/seccomp/libseccomp-golang v0.9.2-0.20220502022130-f33da4d89646/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg=
github.com/shirou/gopsutil/v3 v3.23.7/go.mod h1:c4gnmoRC0hQuaLqvxnx1//VXQ0Ms/X9UnJF8pddY5z4=
github.com/shirou/gopsutil/v3 v3.23.9 h1:ZI5bWVeu2ep4/DIxB4U9okeYJ7zp/QLTO4auRb/ty/E=
github.com/shirou/gopsutil/v3 v3.23.9/go.mod h1:x/NWSb71eMcjFIO0vhyGW5nZ7oSIgVjrCnADckb85GA=
@@ -226,8 +286,12 @@ github.com/shoenig/test v0.6.4 h1:kVTaSd7WLz5WZ2IaoM0RSzRsUD+m8wRR+5qvntpn4LU=
github.com/shoenig/test v0.6.4/go.mod h1:byHiCGXqrVaflBLAMq/srcZIHynQPQgeyvkvXnjqq0k=
github.com/shopspring/decimal v1.2.0 h1:abSATXmQEYyShuxI4/vyW3tV1MrKAJzCZ/0zLUXYbsQ=
github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
+github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
+github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE=
+github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/spf13/cast v1.3.1 h1:nFm6S0SMdyzrzcmThSipiEubIDy8WEXKNZ0UOgiRpng=
github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
+github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
@@ -240,6 +304,7 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
+github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
github.com/tklauser/go-sysconf v0.3.11/go.mod h1:GqXfhXY3kiPa0nAXPDIQIWzJbMCB7AmcWpGR8lSZfqI=
github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFAEVmqU=
github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI=
@@ -251,6 +316,7 @@ github.com/tmc/langchaingo v0.0.0-20231019140956-c636b3da7701/go.mod h1:SiwyRS7s
github.com/ulikunitz/xz v0.5.8/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
github.com/ulikunitz/xz v0.5.9 h1:RsKRIA2MO8x56wkkcd3LbtcE/uMszhb6DpRf+3uwa3I=
github.com/ulikunitz/xz v0.5.9/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
+github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
github.com/urfave/cli/v2 v2.25.7 h1:VAzn5oq403l5pHjc4OhD54+XGO9cdKVL/7lDjF+iKUs=
github.com/urfave/cli/v2 v2.25.7/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ=
github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
@@ -259,10 +325,19 @@ github.com/valyala/fasthttp v1.50.0 h1:H7fweIlBm0rXLs2q0XbalvJ6r0CUPFWK3/bB4N13e
github.com/valyala/fasthttp v1.50.0/go.mod h1:k2zXd82h/7UZc3VOdJ2WaUqt1uZ/XpXAfE9i+HBC3lA=
github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8=
github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc=
+github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE=
+github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU=
+github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f h1:J9EGpcZtP0E/raorCMxlFGSTBrsSlaDGf3jU/qvAE2c=
+github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
+github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHovont7NscjpAxXsDA8S8BMYve8Y5+7cuRE7R0=
+github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ=
+github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17UxZ74=
+github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y=
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofmx9yWTog9BfvIu0q41lo=
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos=
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU=
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8=
+github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
github.com/yuin/goldmark v1.5.2 h1:ALmeCk/px5FSm1MAcFBAsVKZjDuMVj8Tm7FFIlMJnqU=
@@ -290,15 +365,19 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y
golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4=
golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc=
golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4=
+golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc=
golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
+golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
@@ -309,23 +388,32 @@ golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200831180312-196b9ba8737a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20210906170528-6f6e22806c34/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20211116061358-0a5406a5449c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
@@ -354,8 +442,11 @@ golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20190624222133-a101b041ded4/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
+golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.12.0 h1:YW6HUoUmYBpwSgyaGaZq1fHjrBjX1rlpZ54T6mu2kss=
golang.org/x/tools v0.12.0/go.mod h1:Sc0INKfu04TlqNoRA1hgpFZbhYXHPr4V5DzpSBTPqQM=
@@ -375,6 +466,7 @@ google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzi
google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
+google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8=
google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
@@ -393,3 +485,4 @@ gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gotest.tools/v3 v3.0.2/go.mod h1:3SzNCllyD9/Y+b5r9JIKQ474KzkZyqLqEfYqMsX94Bk=
diff --git a/tests/e2e-aio/e2e_suite_test.go b/tests/e2e-aio/e2e_suite_test.go
new file mode 100644
index 00000000..00fc6d2a
--- /dev/null
+++ b/tests/e2e-aio/e2e_suite_test.go
@@ -0,0 +1,97 @@
+package e2e_test
+
+import (
+ "context"
+ "fmt"
+ "os"
+ "runtime"
+ "testing"
+
+ . "github.com/onsi/ginkgo/v2"
+ . "github.com/onsi/gomega"
+ "github.com/ory/dockertest/v3"
+ "github.com/ory/dockertest/v3/docker"
+ "github.com/sashabaranov/go-openai"
+)
+
+var pool *dockertest.Pool
+var resource *dockertest.Resource
+var client *openai.Client
+
+var containerImage = os.Getenv("LOCALAI_IMAGE")
+var containerImageTag = os.Getenv("LOCALAI_IMAGE_TAG")
+var modelsDir = os.Getenv("LOCALAI_MODELS_DIR")
+var apiPort = os.Getenv("LOCALAI_API_PORT")
+
+func TestLocalAI(t *testing.T) {
+ RegisterFailHandler(Fail)
+ RunSpecs(t, "LocalAI E2E test suite")
+}
+
+var _ = BeforeSuite(func() {
+
+ if containerImage == "" {
+ Fail("LOCALAI_IMAGE is not set")
+ }
+ if containerImageTag == "" {
+ Fail("LOCALAI_IMAGE_TAG is not set")
+ }
+ if apiPort == "" {
+ apiPort = "8080"
+ }
+
+ p, err := dockertest.NewPool("")
+ Expect(err).To(Not(HaveOccurred()))
+ Expect(p.Client.Ping()).To(Succeed())
+
+ pool = p
+
+ // get cwd
+ cwd, err := os.Getwd()
+ Expect(err).To(Not(HaveOccurred()))
+ md := cwd + "/models"
+
+ if modelsDir != "" {
+ md = modelsDir
+ }
+
+ proc := runtime.NumCPU()
+ options := &dockertest.RunOptions{
+ Repository: containerImage,
+ Tag: containerImageTag,
+ // Cmd: []string{"server", "/data"},
+ PortBindings: map[docker.Port][]docker.PortBinding{
+ "8080/tcp": []docker.PortBinding{{HostPort: apiPort}},
+ },
+ Env: []string{"MODELS_PATH=/models", "DEBUG=true", "THREADS=" + fmt.Sprint(proc)},
+ Mounts: []string{md + ":/models"},
+ }
+
+ r, err := pool.RunWithOptions(options)
+ Expect(err).To(Not(HaveOccurred()))
+
+ resource = r
+
+ defaultConfig := openai.DefaultConfig("")
+ defaultConfig.BaseURL = "http://localhost:" + apiPort + "/v1"
+
+ // Wait for API to be ready
+ client = openai.NewClientWithConfig(defaultConfig)
+
+ Eventually(func() error {
+ _, err := client.ListModels(context.TODO())
+ return err
+ }, "20m").ShouldNot(HaveOccurred())
+})
+
+var _ = AfterSuite(func() {
+ Expect(pool.Purge(resource)).To(Succeed())
+ //dat, err := os.ReadFile(resource.Container.LogPath)
+ //Expect(err).To(Not(HaveOccurred()))
+ //Expect(string(dat)).To(ContainSubstring("GRPC Service Ready"))
+ //fmt.Println(string(dat))
+})
+
+var _ = AfterEach(func() {
+ //Expect(dbClient.Clear()).To(Succeed())
+})
diff --git a/tests/e2e-aio/e2e_test.go b/tests/e2e-aio/e2e_test.go
new file mode 100644
index 00000000..03d9fda9
--- /dev/null
+++ b/tests/e2e-aio/e2e_test.go
@@ -0,0 +1,152 @@
+package e2e_test
+
+import (
+ "context"
+ "fmt"
+ "io"
+ "net/http"
+ "os"
+
+ . "github.com/onsi/ginkgo/v2"
+ . "github.com/onsi/gomega"
+
+ "github.com/sashabaranov/go-openai"
+)
+
+var _ = Describe("E2E test", func() {
+ Context("Generating", func() {
+ BeforeEach(func() {
+ //
+ })
+
+ // Check that the GPU was used
+ AfterEach(func() {
+ //
+ })
+
+ Context("text", func() {
+ It("correctly", func() {
+ model := "gpt-4"
+ resp, err := client.CreateChatCompletion(context.TODO(),
+ openai.ChatCompletionRequest{
+ Model: model, Messages: []openai.ChatCompletionMessage{
+ {
+ Role: "user",
+ Content: "How much is 2+2?",
+ },
+ }})
+ Expect(err).ToNot(HaveOccurred())
+ Expect(len(resp.Choices)).To(Equal(1), fmt.Sprint(resp))
+ Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("4"), ContainSubstring("four")), fmt.Sprint(resp.Choices[0].Message.Content))
+ })
+ })
+ Context("images", func() {
+ It("correctly", func() {
+ resp, err := client.CreateImage(context.TODO(),
+ openai.ImageRequest{
+ Prompt: "test",
+ Size: openai.CreateImageSize512x512,
+ //ResponseFormat: openai.CreateImageResponseFormatURL,
+ },
+ )
+ Expect(err).ToNot(HaveOccurred())
+ Expect(len(resp.Data)).To(Equal(1), fmt.Sprint(resp))
+ Expect(resp.Data[0].URL).To(ContainSubstring("http://localhost:8080"), fmt.Sprint(resp.Data[0].URL))
+ })
+ })
+ Context("embeddings", func() {
+ It("correctly", func() {
+ resp, err := client.CreateEmbeddings(context.TODO(),
+ openai.EmbeddingRequestStrings{
+ Input: []string{"doc"},
+ Model: openai.AdaEmbeddingV2,
+ },
+ )
+ Expect(err).ToNot(HaveOccurred())
+ Expect(len(resp.Data)).To(Equal(1), fmt.Sprint(resp))
+ Expect(resp.Data[0].Embedding).ToNot(BeEmpty())
+ })
+ })
+ Context("vision", func() {
+ It("correctly", func() {
+ model := "gpt-4-vision-preview"
+ resp, err := client.CreateChatCompletion(context.TODO(),
+ openai.ChatCompletionRequest{
+ Model: model, Messages: []openai.ChatCompletionMessage{
+ {
+
+ Role: "user",
+ MultiContent: []openai.ChatMessagePart{
+ {
+ Type: openai.ChatMessagePartTypeText,
+ Text: "What is in the image?",
+ },
+ {
+ Type: openai.ChatMessagePartTypeImageURL,
+ ImageURL: &openai.ChatMessageImageURL{
+ URL: "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
+ Detail: openai.ImageURLDetailLow,
+ },
+ },
+ },
+ },
+ }})
+ Expect(err).ToNot(HaveOccurred())
+ Expect(len(resp.Choices)).To(Equal(1), fmt.Sprint(resp))
+ Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("wooden"), ContainSubstring("grass")), fmt.Sprint(resp.Choices[0].Message.Content))
+ })
+ })
+ Context("text to audio", func() {
+ It("correctly", func() {
+ res, err := client.CreateSpeech(context.Background(), openai.CreateSpeechRequest{
+ Model: openai.TTSModel1,
+ Input: "Hello!",
+ Voice: openai.VoiceAlloy,
+ })
+ Expect(err).ToNot(HaveOccurred())
+ defer res.Close()
+
+ _, err = io.ReadAll(res)
+ Expect(err).ToNot(HaveOccurred())
+
+ })
+ })
+ Context("audio to text", func() {
+ It("correctly", func() {
+
+ downloadURL := "https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav"
+ file, err := downloadHttpFile(downloadURL)
+ Expect(err).ToNot(HaveOccurred())
+
+ req := openai.AudioRequest{
+ Model: openai.Whisper1,
+ FilePath: file,
+ }
+ resp, err := client.CreateTranscription(context.Background(), req)
+ Expect(err).ToNot(HaveOccurred())
+ Expect(resp.Text).To(ContainSubstring("This is the"), fmt.Sprint(resp.Text))
+ })
+ })
+ })
+})
+
+func downloadHttpFile(url string) (string, error) {
+ resp, err := http.Get(url)
+ if err != nil {
+ return "", err
+ }
+ defer resp.Body.Close()
+
+ tmpfile, err := os.CreateTemp("", "example")
+ if err != nil {
+ return "", err
+ }
+ defer tmpfile.Close()
+
+ _, err = io.Copy(tmpfile, resp.Body)
+ if err != nil {
+ return "", err
+ }
+
+ return tmpfile.Name(), nil
+}
From 643d85d2cc3741d5e461302e7fc5c1f75bb2b8a0 Mon Sep 17 00:00:00 2001
From: Richard Palethorpe
Date: Fri, 22 Mar 2024 20:14:04 +0000
Subject: [PATCH 0027/2750] feat(stores): Vector store backend (#1795)
Add simple vector store backend
Signed-off-by: Richard Palethorpe
---
.editorconfig | 31 +
Makefile | 13 +-
backend/backend.proto | 46 +-
backend/go/stores/debug.go | 14 +
backend/go/stores/main.go | 26 +
backend/go/stores/production.go | 7 +
backend/go/stores/store.go | 507 +++++++
core/backend/stores.go | 23 +
core/http/api.go | 7 +
core/http/api_test.go | 142 ++
core/http/endpoints/localai/stores.go | 121 ++
core/schema/localai.go | 37 +
docs/content/docs/features/stores.md | 97 ++
docs/content/docs/overview.md | 1 +
examples/semantic-todo/README.md | 15 +
examples/semantic-todo/go.mod | 18 +
examples/semantic-todo/go.sum | 50 +
examples/semantic-todo/main.go | 352 +++++
pkg/grpc/backend.go | 5 +
pkg/grpc/base/base.go | 16 +
pkg/grpc/client.go | 64 +
pkg/grpc/embed.go | 16 +
pkg/grpc/interface.go | 5 +
pkg/grpc/proto/backend.pb.go | 1426 +++++++++++++------
pkg/grpc/proto/backend_grpc.pb.go | 201 ++-
pkg/grpc/server.go | 48 +
pkg/model/initializers.go | 3 +
pkg/store/client.go | 155 ++
tests/integration/integration_suite_test.go | 17 +
tests/integration/stores_test.go | 228 +++
30 files changed, 3250 insertions(+), 441 deletions(-)
create mode 100644 .editorconfig
create mode 100644 backend/go/stores/debug.go
create mode 100644 backend/go/stores/main.go
create mode 100644 backend/go/stores/production.go
create mode 100644 backend/go/stores/store.go
create mode 100644 core/backend/stores.go
create mode 100644 core/http/endpoints/localai/stores.go
create mode 100644 docs/content/docs/features/stores.md
create mode 100644 examples/semantic-todo/README.md
create mode 100644 examples/semantic-todo/go.mod
create mode 100644 examples/semantic-todo/go.sum
create mode 100644 examples/semantic-todo/main.go
create mode 100644 pkg/store/client.go
create mode 100644 tests/integration/integration_suite_test.go
create mode 100644 tests/integration/stores_test.go
diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 00000000..b66f3645
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,31 @@
+
+root = true
+
+[*]
+indent_style = space
+indent_size = 2
+end_of_line = lf
+charset = utf-8
+trim_trailing_whitespace = true
+insert_final_newline = true
+
+[*.go]
+indent_style = tab
+
+[Makefile]
+indent_style = tab
+
+[*.proto]
+indent_size = 2
+
+[*.py]
+indent_size = 4
+
+[*.js]
+indent_size = 2
+
+[*.yaml]
+indent_size = 2
+
+[*.md]
+trim_trailing_whitespace = false
diff --git a/Makefile b/Makefile
index 95af1936..94b5570c 100644
--- a/Makefile
+++ b/Makefile
@@ -159,6 +159,7 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all
ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
+ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC)
GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC)
@@ -333,7 +334,7 @@ prepare-test: grpcs
test: prepare test-models/testmodel.ggml grpcs
@echo 'Running tests'
- export GO_TAGS="tts stablediffusion"
+ export GO_TAGS="tts stablediffusion debug"
$(MAKE) prepare-test
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
@@ -387,6 +388,11 @@ test-stablediffusion: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts 1 -v -r $(TEST_PATHS)
+test-stores: backend-assets/grpc/local-store
+ mkdir -p tests/integration/backend-assets/grpc
+ cp -f backend-assets/grpc/local-store tests/integration/backend-assets/grpc/
+ $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stores" --flake-attempts 1 -v -r tests/integration
+
test-container:
docker build --target requirements -t local-ai-test-container .
docker run -ti --rm --entrypoint /bin/bash -ti -v $(abspath ./):/build local-ai-test-container
@@ -536,6 +542,9 @@ backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH=$(CURDIR)/sources/whisper.cpp LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
+backend-assets/grpc/local-store: backend-assets/grpc
+ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/local-store ./backend/go/stores/
+
grpcs: prepare $(GRPC_BACKENDS)
DOCKER_IMAGE?=local-ai
@@ -573,4 +582,4 @@ docker-image-intel-xpu:
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04 \
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg GO_TAGS="none" \
- --build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
\ No newline at end of file
+ --build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
diff --git a/backend/backend.proto b/backend/backend.proto
index 30e2f8b2..c3d3180b 100644
--- a/backend/backend.proto
+++ b/backend/backend.proto
@@ -18,6 +18,48 @@ service Backend {
rpc TTS(TTSRequest) returns (Result) {}
rpc TokenizeString(PredictOptions) returns (TokenizationResponse) {}
rpc Status(HealthMessage) returns (StatusResponse) {}
+
+ rpc StoresSet(StoresSetOptions) returns (Result) {}
+ rpc StoresDelete(StoresDeleteOptions) returns (Result) {}
+ rpc StoresGet(StoresGetOptions) returns (StoresGetResult) {}
+ rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {}
+}
+
+message StoresKey {
+ repeated float Floats = 1;
+}
+
+message StoresValue {
+ bytes Bytes = 1;
+}
+
+message StoresSetOptions {
+ repeated StoresKey Keys = 1;
+ repeated StoresValue Values = 2;
+}
+
+message StoresDeleteOptions {
+ repeated StoresKey Keys = 1;
+}
+
+message StoresGetOptions {
+ repeated StoresKey Keys = 1;
+}
+
+message StoresGetResult {
+ repeated StoresKey Keys = 1;
+ repeated StoresValue Values = 2;
+}
+
+message StoresFindOptions {
+ StoresKey Key = 1;
+ int32 TopK = 2;
+}
+
+message StoresFindResult {
+ repeated StoresKey Keys = 1;
+ repeated StoresValue Values = 2;
+ repeated float Similarities = 3;
}
message HealthMessage {}
@@ -121,7 +163,7 @@ message ModelOptions {
bool NoMulMatQ = 37;
string DraftModel = 39;
-
+
string AudioPath = 38;
// vllm
@@ -213,4 +255,4 @@ message StatusResponse {
}
State state = 1;
MemoryUsageData memory = 2;
-}
\ No newline at end of file
+}
diff --git a/backend/go/stores/debug.go b/backend/go/stores/debug.go
new file mode 100644
index 00000000..6f0b8ba8
--- /dev/null
+++ b/backend/go/stores/debug.go
@@ -0,0 +1,14 @@
+//go:build debug
+// +build debug
+
+package main
+
+import (
+ "github.com/rs/zerolog/log"
+)
+
+func assert(cond bool, msg string) {
+ if !cond {
+ log.Fatal().Stack().Msg(msg)
+ }
+}
diff --git a/backend/go/stores/main.go b/backend/go/stores/main.go
new file mode 100644
index 00000000..9a113d79
--- /dev/null
+++ b/backend/go/stores/main.go
@@ -0,0 +1,26 @@
+package main
+
+// Note: this is started internally by LocalAI and a server is allocated for each store
+
+import (
+ "flag"
+ "os"
+
+ grpc "github.com/go-skynet/LocalAI/pkg/grpc"
+ "github.com/rs/zerolog"
+ "github.com/rs/zerolog/log"
+)
+
+var (
+ addr = flag.String("addr", "localhost:50051", "the address to connect to")
+)
+
+func main() {
+ log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr})
+
+ flag.Parse()
+
+ if err := grpc.StartServer(*addr, NewStore()); err != nil {
+ panic(err)
+ }
+}
diff --git a/backend/go/stores/production.go b/backend/go/stores/production.go
new file mode 100644
index 00000000..418b6397
--- /dev/null
+++ b/backend/go/stores/production.go
@@ -0,0 +1,7 @@
+//go:build !debug
+// +build !debug
+
+package main
+
+func assert(cond bool, msg string) {
+}
diff --git a/backend/go/stores/store.go b/backend/go/stores/store.go
new file mode 100644
index 00000000..9be31df8
--- /dev/null
+++ b/backend/go/stores/store.go
@@ -0,0 +1,507 @@
+package main
+
+// This is a wrapper to statisfy the GRPC service interface
+// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
+import (
+ "container/heap"
+ "fmt"
+ "math"
+ "slices"
+
+ "github.com/go-skynet/LocalAI/pkg/grpc/base"
+ pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
+
+ "github.com/rs/zerolog/log"
+)
+
+type Store struct {
+ base.SingleThread
+
+ // The sorted keys
+ keys [][]float32
+ // The sorted values
+ values [][]byte
+
+ // If for every K it holds that ||k||^2 = 1, then we can use the normalized distance functions
+ // TODO: Should we normalize incoming keys if they are not instead?
+ keysAreNormalized bool
+ // The first key decides the length of the keys
+ keyLen int
+}
+
+// TODO: Only used for sorting using Go's builtin implementation. The interfaces are columnar because
+// that's theoretically best for memory layout and cache locality, but this isn't optimized yet.
+type Pair struct {
+ Key []float32
+ Value []byte
+}
+
+func NewStore() *Store {
+ return &Store{
+ keys: make([][]float32, 0),
+ values: make([][]byte, 0),
+ keysAreNormalized: true,
+ keyLen: -1,
+ }
+}
+
+func compareSlices(k1, k2 []float32) int {
+ assert(len(k1) == len(k2), fmt.Sprintf("compareSlices: len(k1) = %d, len(k2) = %d", len(k1), len(k2)))
+
+ return slices.Compare(k1, k2)
+}
+
+func hasKey(unsortedSlice [][]float32, target []float32) bool {
+ return slices.ContainsFunc(unsortedSlice, func(k []float32) bool {
+ return compareSlices(k, target) == 0
+ })
+}
+
+func findInSortedSlice(sortedSlice [][]float32, target []float32) (int, bool) {
+ return slices.BinarySearchFunc(sortedSlice, target, func(k, t []float32) int {
+ return compareSlices(k, t)
+ })
+}
+
+func isSortedPairs(kvs []Pair) bool {
+ for i := 1; i < len(kvs); i++ {
+ if compareSlices(kvs[i-1].Key, kvs[i].Key) > 0 {
+ return false
+ }
+ }
+
+ return true
+}
+
+func isSortedKeys(keys [][]float32) bool {
+ for i := 1; i < len(keys); i++ {
+ if compareSlices(keys[i-1], keys[i]) > 0 {
+ return false
+ }
+ }
+
+ return true
+}
+
+func sortIntoKeySlicese(keys []*pb.StoresKey) [][]float32 {
+ ks := make([][]float32, len(keys))
+
+ for i, k := range keys {
+ ks[i] = k.Floats
+ }
+
+ slices.SortFunc(ks, compareSlices)
+
+ assert(len(ks) == len(keys), fmt.Sprintf("len(ks) = %d, len(keys) = %d", len(ks), len(keys)))
+ assert(isSortedKeys(ks), "keys are not sorted")
+
+ return ks
+}
+
+func (s *Store) Load(opts *pb.ModelOptions) error {
+ return nil
+}
+
+// Sort the incoming kvs and merge them with the existing sorted kvs
+func (s *Store) StoresSet(opts *pb.StoresSetOptions) error {
+ if len(opts.Keys) == 0 {
+ return fmt.Errorf("no keys to add")
+ }
+
+ if len(opts.Keys) != len(opts.Values) {
+ return fmt.Errorf("len(keys) = %d, len(values) = %d", len(opts.Keys), len(opts.Values))
+ }
+
+ if s.keyLen == -1 {
+ s.keyLen = len(opts.Keys[0].Floats)
+ } else {
+ if len(opts.Keys[0].Floats) != s.keyLen {
+ return fmt.Errorf("Try to add key with length %d when existing length is %d", len(opts.Keys[0].Floats), s.keyLen)
+ }
+ }
+
+ kvs := make([]Pair, len(opts.Keys))
+
+ for i, k := range opts.Keys {
+ if s.keysAreNormalized && !isNormalized(k.Floats) {
+ s.keysAreNormalized = false
+ var sample []float32
+ if len(s.keys) > 5 {
+ sample = k.Floats[:5]
+ } else {
+ sample = k.Floats
+ }
+ log.Debug().Msgf("Key is not normalized: %v", sample)
+ }
+
+ kvs[i] = Pair{
+ Key: k.Floats,
+ Value: opts.Values[i].Bytes,
+ }
+ }
+
+ slices.SortFunc(kvs, func(a, b Pair) int {
+ return compareSlices(a.Key, b.Key)
+ })
+
+ assert(len(kvs) == len(opts.Keys), fmt.Sprintf("len(kvs) = %d, len(opts.Keys) = %d", len(kvs), len(opts.Keys)))
+ assert(isSortedPairs(kvs), "keys are not sorted")
+
+ l := len(kvs) + len(s.keys)
+ merge_ks := make([][]float32, 0, l)
+ merge_vs := make([][]byte, 0, l)
+
+ i, j := 0, 0
+ for {
+ if i+j >= l {
+ break
+ }
+
+ if i >= len(kvs) {
+ merge_ks = append(merge_ks, s.keys[j])
+ merge_vs = append(merge_vs, s.values[j])
+ j++
+ continue
+ }
+
+ if j >= len(s.keys) {
+ merge_ks = append(merge_ks, kvs[i].Key)
+ merge_vs = append(merge_vs, kvs[i].Value)
+ i++
+ continue
+ }
+
+ c := compareSlices(kvs[i].Key, s.keys[j])
+ if c < 0 {
+ merge_ks = append(merge_ks, kvs[i].Key)
+ merge_vs = append(merge_vs, kvs[i].Value)
+ i++
+ } else if c > 0 {
+ merge_ks = append(merge_ks, s.keys[j])
+ merge_vs = append(merge_vs, s.values[j])
+ j++
+ } else {
+ merge_ks = append(merge_ks, kvs[i].Key)
+ merge_vs = append(merge_vs, kvs[i].Value)
+ i++
+ j++
+ }
+ }
+
+ assert(len(merge_ks) == l, fmt.Sprintf("len(merge_ks) = %d, l = %d", len(merge_ks), l))
+ assert(isSortedKeys(merge_ks), "merge keys are not sorted")
+
+ s.keys = merge_ks
+ s.values = merge_vs
+
+ return nil
+}
+
+func (s *Store) StoresDelete(opts *pb.StoresDeleteOptions) error {
+ if len(opts.Keys) == 0 {
+ return fmt.Errorf("no keys to delete")
+ }
+
+ if len(opts.Keys) == 0 {
+ return fmt.Errorf("no keys to add")
+ }
+
+ if s.keyLen == -1 {
+ s.keyLen = len(opts.Keys[0].Floats)
+ } else {
+ if len(opts.Keys[0].Floats) != s.keyLen {
+ return fmt.Errorf("Trying to delete key with length %d when existing length is %d", len(opts.Keys[0].Floats), s.keyLen)
+ }
+ }
+
+ ks := sortIntoKeySlicese(opts.Keys)
+
+ l := len(s.keys) - len(ks)
+ merge_ks := make([][]float32, 0, l)
+ merge_vs := make([][]byte, 0, l)
+
+ tail_ks := s.keys
+ tail_vs := s.values
+ for _, k := range ks {
+ j, found := findInSortedSlice(tail_ks, k)
+
+ if found {
+ merge_ks = append(merge_ks, tail_ks[:j]...)
+ merge_vs = append(merge_vs, tail_vs[:j]...)
+ tail_ks = tail_ks[j+1:]
+ tail_vs = tail_vs[j+1:]
+ } else {
+ assert(!hasKey(s.keys, k), fmt.Sprintf("Key exists, but was not found: t=%d, %v", len(tail_ks), k))
+ }
+
+ log.Debug().Msgf("Delete: found = %v, t = %d, j = %d, len(merge_ks) = %d, len(merge_vs) = %d", found, len(tail_ks), j, len(merge_ks), len(merge_vs))
+ }
+
+ merge_ks = append(merge_ks, tail_ks...)
+ merge_vs = append(merge_vs, tail_vs...)
+
+ assert(len(merge_ks) <= len(s.keys), fmt.Sprintf("len(merge_ks) = %d, len(s.keys) = %d", len(merge_ks), len(s.keys)))
+
+ s.keys = merge_ks
+ s.values = merge_vs
+
+ assert(len(s.keys) >= l, fmt.Sprintf("len(s.keys) = %d, l = %d", len(s.keys), l))
+ assert(isSortedKeys(s.keys), "keys are not sorted")
+ assert(func() bool {
+ for _, k := range ks {
+ if _, found := findInSortedSlice(s.keys, k); found {
+ return false
+ }
+ }
+ return true
+ }(), "Keys to delete still present")
+
+ if len(s.keys) != l {
+ log.Debug().Msgf("Delete: Some keys not found: len(s.keys) = %d, l = %d", len(s.keys), l)
+ }
+
+ return nil
+}
+
+func (s *Store) StoresGet(opts *pb.StoresGetOptions) (pb.StoresGetResult, error) {
+ pbKeys := make([]*pb.StoresKey, 0, len(opts.Keys))
+ pbValues := make([]*pb.StoresValue, 0, len(opts.Keys))
+ ks := sortIntoKeySlicese(opts.Keys)
+
+ if len(s.keys) == 0 {
+ log.Debug().Msgf("Get: No keys in store")
+ }
+
+ if s.keyLen == -1 {
+ s.keyLen = len(opts.Keys[0].Floats)
+ } else {
+ if len(opts.Keys[0].Floats) != s.keyLen {
+ return pb.StoresGetResult{}, fmt.Errorf("Try to get a key with length %d when existing length is %d", len(opts.Keys[0].Floats), s.keyLen)
+ }
+ }
+
+ tail_k := s.keys
+ tail_v := s.values
+ for i, k := range ks {
+ j, found := findInSortedSlice(tail_k, k)
+
+ if found {
+ pbKeys = append(pbKeys, &pb.StoresKey{
+ Floats: k,
+ })
+ pbValues = append(pbValues, &pb.StoresValue{
+ Bytes: tail_v[j],
+ })
+
+ tail_k = tail_k[j+1:]
+ tail_v = tail_v[j+1:]
+ } else {
+ assert(!hasKey(s.keys, k), fmt.Sprintf("Key exists, but was not found: i=%d, %v", i, k))
+ }
+ }
+
+ if len(pbKeys) != len(opts.Keys) {
+ log.Debug().Msgf("Get: Some keys not found: len(pbKeys) = %d, len(opts.Keys) = %d, len(s.Keys) = %d", len(pbKeys), len(opts.Keys), len(s.keys))
+ }
+
+ return pb.StoresGetResult{
+ Keys: pbKeys,
+ Values: pbValues,
+ }, nil
+}
+
+func isNormalized(k []float32) bool {
+ var sum float32
+ for _, v := range k {
+ sum += v
+ }
+
+ return sum == 1.0
+}
+
+// TODO: This we could replace with handwritten SIMD code
+func normalizedCosineSimilarity(k1, k2 []float32) float32 {
+ assert(len(k1) == len(k2), fmt.Sprintf("normalizedCosineSimilarity: len(k1) = %d, len(k2) = %d", len(k1), len(k2)))
+
+ var dot float32
+ for i := 0; i < len(k1); i++ {
+ dot += k1[i] * k2[i]
+ }
+
+ assert(dot >= -1 && dot <= 1, fmt.Sprintf("dot = %f", dot))
+
+ // 2.0 * (1.0 - dot) would be the Euclidean distance
+ return dot
+}
+
+type PriorityItem struct {
+ Similarity float32
+ Key []float32
+ Value []byte
+}
+
+type PriorityQueue []*PriorityItem
+
+func (pq PriorityQueue) Len() int { return len(pq) }
+
+func (pq PriorityQueue) Less(i, j int) bool {
+ // Inverted because the most similar should be at the top
+ return pq[i].Similarity < pq[j].Similarity
+}
+
+func (pq PriorityQueue) Swap(i, j int) {
+ pq[i], pq[j] = pq[j], pq[i]
+}
+
+func (pq *PriorityQueue) Push(x any) {
+ item := x.(*PriorityItem)
+ *pq = append(*pq, item)
+}
+
+func (pq *PriorityQueue) Pop() any {
+ old := *pq
+ n := len(old)
+ item := old[n-1]
+ *pq = old[0 : n-1]
+ return item
+}
+
+func (s *Store) StoresFindNormalized(opts *pb.StoresFindOptions) (pb.StoresFindResult, error) {
+ tk := opts.Key.Floats
+ top_ks := make(PriorityQueue, 0, int(opts.TopK))
+ heap.Init(&top_ks)
+
+ for i, k := range s.keys {
+ sim := normalizedCosineSimilarity(tk, k)
+ heap.Push(&top_ks, &PriorityItem{
+ Similarity: sim,
+ Key: k,
+ Value: s.values[i],
+ })
+
+ if top_ks.Len() > int(opts.TopK) {
+ heap.Pop(&top_ks)
+ }
+ }
+
+ similarities := make([]float32, top_ks.Len())
+ pbKeys := make([]*pb.StoresKey, top_ks.Len())
+ pbValues := make([]*pb.StoresValue, top_ks.Len())
+
+ for i := top_ks.Len() - 1; i >= 0; i-- {
+ item := heap.Pop(&top_ks).(*PriorityItem)
+
+ similarities[i] = item.Similarity
+ pbKeys[i] = &pb.StoresKey{
+ Floats: item.Key,
+ }
+ pbValues[i] = &pb.StoresValue{
+ Bytes: item.Value,
+ }
+ }
+
+ return pb.StoresFindResult{
+ Keys: pbKeys,
+ Values: pbValues,
+ Similarities: similarities,
+ }, nil
+}
+
+func cosineSimilarity(k1, k2 []float32, mag1 float64) float32 {
+ assert(len(k1) == len(k2), fmt.Sprintf("cosineSimilarity: len(k1) = %d, len(k2) = %d", len(k1), len(k2)))
+
+ var dot, mag2 float64
+ for i := 0; i < len(k1); i++ {
+ dot += float64(k1[i] * k2[i])
+ mag2 += float64(k2[i] * k2[i])
+ }
+
+ sim := float32(dot / (mag1 * math.Sqrt(mag2)))
+
+ assert(sim >= -1 && sim <= 1, fmt.Sprintf("sim = %f", sim))
+
+ return sim
+}
+
+func (s *Store) StoresFindFallback(opts *pb.StoresFindOptions) (pb.StoresFindResult, error) {
+ tk := opts.Key.Floats
+ top_ks := make(PriorityQueue, 0, int(opts.TopK))
+ heap.Init(&top_ks)
+
+ var mag1 float64
+ for _, v := range tk {
+ mag1 += float64(v * v)
+ }
+ mag1 = math.Sqrt(mag1)
+
+ for i, k := range s.keys {
+ dist := cosineSimilarity(tk, k, mag1)
+ heap.Push(&top_ks, &PriorityItem{
+ Similarity: dist,
+ Key: k,
+ Value: s.values[i],
+ })
+
+ if top_ks.Len() > int(opts.TopK) {
+ heap.Pop(&top_ks)
+ }
+ }
+
+ similarities := make([]float32, top_ks.Len())
+ pbKeys := make([]*pb.StoresKey, top_ks.Len())
+ pbValues := make([]*pb.StoresValue, top_ks.Len())
+
+ for i := top_ks.Len() - 1; i >= 0; i-- {
+ item := heap.Pop(&top_ks).(*PriorityItem)
+
+ similarities[i] = item.Similarity
+ pbKeys[i] = &pb.StoresKey{
+ Floats: item.Key,
+ }
+ pbValues[i] = &pb.StoresValue{
+ Bytes: item.Value,
+ }
+ }
+
+ return pb.StoresFindResult{
+ Keys: pbKeys,
+ Values: pbValues,
+ Similarities: similarities,
+ }, nil
+}
+
+func (s *Store) StoresFind(opts *pb.StoresFindOptions) (pb.StoresFindResult, error) {
+ tk := opts.Key.Floats
+
+ if len(tk) != s.keyLen {
+ return pb.StoresFindResult{}, fmt.Errorf("Try to find key with length %d when existing length is %d", len(tk), s.keyLen)
+ }
+
+ if opts.TopK < 1 {
+ return pb.StoresFindResult{}, fmt.Errorf("opts.TopK = %d, must be >= 1", opts.TopK)
+ }
+
+ if s.keyLen == -1 {
+ s.keyLen = len(opts.Key.Floats)
+ } else {
+ if len(opts.Key.Floats) != s.keyLen {
+ return pb.StoresFindResult{}, fmt.Errorf("Try to add key with length %d when existing length is %d", len(opts.Key.Floats), s.keyLen)
+ }
+ }
+
+ if s.keysAreNormalized && isNormalized(tk) {
+ return s.StoresFindNormalized(opts)
+ } else {
+ if s.keysAreNormalized {
+ var sample []float32
+ if len(s.keys) > 5 {
+ sample = tk[:5]
+ } else {
+ sample = tk
+ }
+ log.Debug().Msgf("Trying to compare non-normalized key with normalized keys: %v", sample)
+ }
+
+ return s.StoresFindFallback(opts)
+ }
+}
diff --git a/core/backend/stores.go b/core/backend/stores.go
new file mode 100644
index 00000000..7b69d1bd
--- /dev/null
+++ b/core/backend/stores.go
@@ -0,0 +1,23 @@
+package backend
+
+import (
+ "github.com/go-skynet/LocalAI/core/config"
+
+ "github.com/go-skynet/LocalAI/pkg/grpc"
+ "github.com/go-skynet/LocalAI/pkg/model"
+)
+
+func StoreBackend(sl *model.ModelLoader, appConfig *config.ApplicationConfig, storeName string) (grpc.Backend, error) {
+ if storeName == "" {
+ storeName = "default"
+ }
+
+ sc := []model.Option{
+ model.WithBackendString(model.LocalStoreBackend),
+ model.WithAssetDir(appConfig.AssetsDestination),
+ model.WithModel(storeName),
+ }
+
+ return sl.BackendLoader(sc...)
+}
+
diff --git a/core/http/api.go b/core/http/api.go
index 8578b89e..039e835b 100644
--- a/core/http/api.go
+++ b/core/http/api.go
@@ -172,6 +172,13 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
// Elevenlabs
app.Post("/v1/text-to-speech/:voice-id", auth, elevenlabs.TTSEndpoint(cl, ml, appConfig))
+ // Stores
+ sl := model.NewModelLoader("")
+ app.Post("/stores/set", auth, localai.StoresSetEndpoint(sl, appConfig))
+ app.Post("/stores/delete", auth, localai.StoresDeleteEndpoint(sl, appConfig))
+ app.Post("/stores/get", auth, localai.StoresGetEndpoint(sl, appConfig))
+ app.Post("/stores/find", auth, localai.StoresFindEndpoint(sl, appConfig))
+
// openAI compatible API endpoint
// chat
diff --git a/core/http/api_test.go b/core/http/api_test.go
index ca69e8bf..804c15fe 100644
--- a/core/http/api_test.go
+++ b/core/http/api_test.go
@@ -15,6 +15,7 @@ import (
"github.com/go-skynet/LocalAI/core/config"
. "github.com/go-skynet/LocalAI/core/http"
+ "github.com/go-skynet/LocalAI/core/schema"
"github.com/go-skynet/LocalAI/core/startup"
"github.com/go-skynet/LocalAI/pkg/downloader"
@@ -122,6 +123,75 @@ func postModelApplyRequest(url string, request modelApplyRequest) (response map[
return
}
+func postRequestJSON[B any](url string, bodyJson *B) error {
+ payload, err := json.Marshal(bodyJson)
+ if err != nil {
+ return err
+ }
+
+ GinkgoWriter.Printf("POST %s: %s\n", url, string(payload))
+
+ req, err := http.NewRequest("POST", url, bytes.NewBuffer(payload))
+ if err != nil {
+ return err
+ }
+
+ req.Header.Set("Content-Type", "application/json")
+
+ client := &http.Client{}
+ resp, err := client.Do(req)
+ if err != nil {
+ return err
+ }
+
+ defer resp.Body.Close()
+
+ body, err := io.ReadAll(resp.Body)
+ if err != nil {
+ return err
+ }
+
+ if resp.StatusCode < 200 || resp.StatusCode >= 400 {
+ return fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body))
+ }
+
+ return nil
+}
+
+func postRequestResponseJSON[B1 any, B2 any](url string, reqJson *B1, respJson *B2) error {
+ payload, err := json.Marshal(reqJson)
+ if err != nil {
+ return err
+ }
+
+ GinkgoWriter.Printf("POST %s: %s\n", url, string(payload))
+
+ req, err := http.NewRequest("POST", url, bytes.NewBuffer(payload))
+ if err != nil {
+ return err
+ }
+
+ req.Header.Set("Content-Type", "application/json")
+
+ client := &http.Client{}
+ resp, err := client.Do(req)
+ if err != nil {
+ return err
+ }
+ defer resp.Body.Close()
+
+ body, err := io.ReadAll(resp.Body)
+ if err != nil {
+ return err
+ }
+
+ if resp.StatusCode < 200 || resp.StatusCode >= 400 {
+ return fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body))
+ }
+
+ return json.Unmarshal(body, respJson)
+}
+
//go:embed backend-assets/*
var backendAssets embed.FS
@@ -836,6 +906,78 @@ var _ = Describe("API test", func() {
Expect(tokens).ToNot(Or(Equal(1), Equal(0)))
})
})
+
+ // See tests/integration/stores_test
+ Context("Stores", Label("stores"), func() {
+
+ It("sets, gets, finds and deletes entries", func() {
+ ks := [][]float32{
+ {0.1, 0.2, 0.3},
+ {0.4, 0.5, 0.6},
+ {0.7, 0.8, 0.9},
+ }
+ vs := []string{
+ "test1",
+ "test2",
+ "test3",
+ }
+ setBody := schema.StoresSet{
+ Keys: ks,
+ Values: vs,
+ }
+
+ url := "http://127.0.0.1:9090/stores/"
+ err := postRequestJSON(url+"set", &setBody)
+ Expect(err).ToNot(HaveOccurred())
+
+ getBody := schema.StoresGet{
+ Keys: ks,
+ }
+ var getRespBody schema.StoresGetResponse
+ err = postRequestResponseJSON(url+"get", &getBody, &getRespBody)
+ Expect(err).ToNot(HaveOccurred())
+ Expect(len(getRespBody.Keys)).To(Equal(len(ks)))
+
+ for i, v := range getRespBody.Keys {
+ if v[0] == 0.1 {
+ Expect(getRespBody.Values[i]).To(Equal("test1"))
+ } else if v[0] == 0.4 {
+ Expect(getRespBody.Values[i]).To(Equal("test2"))
+ } else {
+ Expect(getRespBody.Values[i]).To(Equal("test3"))
+ }
+ }
+
+ deleteBody := schema.StoresDelete{
+ Keys: [][]float32{
+ {0.1, 0.2, 0.3},
+ },
+ }
+ err = postRequestJSON(url+"delete", &deleteBody)
+ Expect(err).ToNot(HaveOccurred())
+
+ findBody := schema.StoresFind{
+ Key: []float32{0.1, 0.3, 0.7},
+ Topk: 10,
+ }
+
+ var findRespBody schema.StoresFindResponse
+ err = postRequestResponseJSON(url+"find", &findBody, &findRespBody)
+ Expect(err).ToNot(HaveOccurred())
+ Expect(len(findRespBody.Keys)).To(Equal(2))
+
+ for i, v := range findRespBody.Keys {
+ if v[0] == 0.4 {
+ Expect(findRespBody.Values[i]).To(Equal("test2"))
+ } else {
+ Expect(findRespBody.Values[i]).To(Equal("test3"))
+ }
+
+ Expect(findRespBody.Similarities[i]).To(BeNumerically(">=", -1))
+ Expect(findRespBody.Similarities[i]).To(BeNumerically("<=", 1))
+ }
+ })
+ })
})
Context("Config file", func() {
diff --git a/core/http/endpoints/localai/stores.go b/core/http/endpoints/localai/stores.go
new file mode 100644
index 00000000..c8abfdb1
--- /dev/null
+++ b/core/http/endpoints/localai/stores.go
@@ -0,0 +1,121 @@
+package localai
+
+import (
+ "github.com/go-skynet/LocalAI/core/backend"
+ "github.com/go-skynet/LocalAI/core/config"
+ "github.com/go-skynet/LocalAI/core/schema"
+ "github.com/go-skynet/LocalAI/pkg/model"
+ "github.com/go-skynet/LocalAI/pkg/store"
+ "github.com/gofiber/fiber/v2"
+)
+
+func StoresSetEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+ return func(c *fiber.Ctx) error {
+ input := new(schema.StoresSet)
+
+ if err := c.BodyParser(input); err != nil {
+ return err
+ }
+
+ sb, err := backend.StoreBackend(sl, appConfig, input.Store)
+ if err != nil {
+ return err
+ }
+
+ vals := make([][]byte, len(input.Values))
+ for i, v := range input.Values {
+ vals[i] = []byte(v)
+ }
+
+ err = store.SetCols(c.Context(), sb, input.Keys, vals)
+ if err != nil {
+ return err
+ }
+
+ return c.Send(nil)
+ }
+}
+
+func StoresDeleteEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+ return func(c *fiber.Ctx) error {
+ input := new(schema.StoresDelete)
+
+ if err := c.BodyParser(input); err != nil {
+ return err
+ }
+
+ sb, err := backend.StoreBackend(sl, appConfig, input.Store)
+ if err != nil {
+ return err
+ }
+
+ if err := store.DeleteCols(c.Context(), sb, input.Keys); err != nil {
+ return err
+ }
+
+ return c.Send(nil)
+ }
+}
+
+func StoresGetEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+ return func(c *fiber.Ctx) error {
+ input := new(schema.StoresGet)
+
+ if err := c.BodyParser(input); err != nil {
+ return err
+ }
+
+ sb, err := backend.StoreBackend(sl, appConfig, input.Store)
+ if err != nil {
+ return err
+ }
+
+ keys, vals, err := store.GetCols(c.Context(), sb, input.Keys)
+ if err != nil {
+ return err
+ }
+
+ res := schema.StoresGetResponse{
+ Keys: keys,
+ Values: make([]string, len(vals)),
+ }
+
+ for i, v := range vals {
+ res.Values[i] = string(v)
+ }
+
+ return c.JSON(res)
+ }
+}
+
+func StoresFindEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+ return func(c *fiber.Ctx) error {
+ input := new(schema.StoresFind)
+
+ if err := c.BodyParser(input); err != nil {
+ return err
+ }
+
+ sb, err := backend.StoreBackend(sl, appConfig, input.Store)
+ if err != nil {
+ return err
+ }
+
+ keys, vals, similarities, err := store.Find(c.Context(), sb, input.Key, input.Topk)
+ if err != nil {
+ return err
+ }
+
+ res := schema.StoresFindResponse{
+ Keys: keys,
+ Values: make([]string, len(vals)),
+ Similarities: similarities,
+ }
+
+ for i, v := range vals {
+ res.Values[i] = string(v)
+ }
+
+ return c.JSON(res)
+ }
+}
diff --git a/core/schema/localai.go b/core/schema/localai.go
index 5f5fd41e..e9b61cf3 100644
--- a/core/schema/localai.go
+++ b/core/schema/localai.go
@@ -20,3 +20,40 @@ type TTSRequest struct {
Voice string `json:"voice" yaml:"voice"`
Backend string `json:"backend" yaml:"backend"`
}
+
+type StoresSet struct {
+ Store string `json:"store,omitempty" yaml:"store,omitempty"`
+
+ Keys [][]float32 `json:"keys" yaml:"keys"`
+ Values []string `json:"values" yaml:"values"`
+}
+
+type StoresDelete struct {
+ Store string `json:"store,omitempty" yaml:"store,omitempty"`
+
+ Keys [][]float32 `json:"keys"`
+}
+
+type StoresGet struct {
+ Store string `json:"store,omitempty" yaml:"store,omitempty"`
+
+ Keys [][]float32 `json:"keys" yaml:"keys"`
+}
+
+type StoresGetResponse struct {
+ Keys [][]float32 `json:"keys" yaml:"keys"`
+ Values []string `json:"values" yaml:"values"`
+}
+
+type StoresFind struct {
+ Store string `json:"store,omitempty" yaml:"store,omitempty"`
+
+ Key []float32 `json:"key" yaml:"key"`
+ Topk int `json:"topk" yaml:"topk"`
+}
+
+type StoresFindResponse struct {
+ Keys [][]float32 `json:"keys" yaml:"keys"`
+ Values []string `json:"values" yaml:"values"`
+ Similarities []float32 `json:"similarities" yaml:"similarities"`
+}
diff --git a/docs/content/docs/features/stores.md b/docs/content/docs/features/stores.md
new file mode 100644
index 00000000..18fc750c
--- /dev/null
+++ b/docs/content/docs/features/stores.md
@@ -0,0 +1,97 @@
+
++++
+disableToc = false
+title = "💾 Stores"
+
+weight = 18
+url = '/stores'
++++
+
+Stores are an experimental feature to help with querying data using similarity search. It is
+a low level API that consists of only `get`, `set`, `delete` and `find`.
+
+For example if you have an embedding of some text and want to find text with similar embeddings.
+You can create embeddings for chunks of all your text then compare them against the embedding of the text you
+are searching on.
+
+An embedding here meaning a vector of numbers that represent some information about the text. The
+embeddings are created from an A.I. model such as BERT or a more traditional method such as word
+frequency.
+
+Previously you would have to integrate with an external vector database or library directly.
+With the stores feature you can now do it through the LocalAI API.
+
+Note however that doing a similarity search on embeddings is just one way to do retrieval. A higher level
+API can take this into account, so this may not be the best place to start.
+
+## API overview
+
+There is an internal gRPC API and an external facing HTTP JSON API. We'll just discuss the external HTTP API,
+however the HTTP API mirrors the gRPC API. Consult `pkg/store/client` for internal usage.
+
+Everything is in columnar format meaning that instead of getting an array of objects with a key and a value each.
+You instead get two separate arrays of keys and values.
+
+Keys are arrays of floating point numbers with a maximum width of 32bits. Values are strings (in gRPC they are bytes).
+
+The key vectors must all be the same length and it's best for search performance if they are normalized. When
+addings keys it will be detected if they are not normalized and what length they are.
+
+All endpoints accept a `store` field which specifies which store to operate on. Presently they are created
+on the fly and there is only one store backend so no configuration is required.
+
+## Set
+
+To set some keys you can do
+
+```
+curl -X POST http://localhost:8080/stores/set \
+ -H "Content-Type: application/json" \
+ -d '{"keys": [[0.1, 0.2], [0.3, 0.4]], "values": ["foo", "bar"]}'
+```
+
+Setting the same keys again will update their values.
+
+On success 200 OK is returned with no body.
+
+## Get
+
+To get some keys you can do
+
+```
+curl -X POST http://localhost:8080/stores/get \
+ -H "Content-Type: application/json" \
+ -d '{"keys": [[0.1, 0.2]]}'
+```
+
+Both the keys and values are returned, e.g: `{"keys":[[0.1,0.2]],"values":["foo"]}`
+
+The order of the keys is not preserved! If a key does not exist then nothing is returned.
+
+## Delete
+
+To delete keys and values you can do
+
+```
+curl -X POST http://localhost:8080/stores/delete \
+ -H "Content-Type: application/json" \
+ -d '{"keys": [[0.1, 0.2]]}'
+```
+
+If a key doesn't exist then it is ignored.
+
+On success 200 OK is returned with no body.
+
+## Find
+
+To do a similarity search you can do
+
+```
+curl -X POST http://localhost:8080/stores/find
+ -H "Content-Type: application/json" \
+ -d '{"topk": 2, "key": [0.2, 0.1]}'
+```
+
+`topk` limits the number of results returned. The result value is the same as `get`,
+except that it also includes an array of `similarities`. Where `1.0` is the maximum similarity.
+They are returned in the order of most similar to least.
diff --git a/docs/content/docs/overview.md b/docs/content/docs/overview.md
index 3ac21e94..f78a9be0 100644
--- a/docs/content/docs/overview.md
+++ b/docs/content/docs/overview.md
@@ -73,6 +73,7 @@ Note that this started just as a fun weekend project by [mudler](https://github.
- ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
- 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/)
- 🆕 [Vision API](https://localai.io/features/gpt-vision/)
+- 💾 [Stores](https://localai.io/features/stores)
## Contribute and help
diff --git a/examples/semantic-todo/README.md b/examples/semantic-todo/README.md
new file mode 100644
index 00000000..ec9e19b9
--- /dev/null
+++ b/examples/semantic-todo/README.md
@@ -0,0 +1,15 @@
+This demonstrates the vector store backend in its simplest form.
+You can add tasks and then search/sort them using the TUI.
+
+To build and run do
+
+```bash
+$ go get .
+$ go run .
+```
+
+A seperate LocaAI instance is required of course. For e.g.
+
+```bash
+$ docker run -e DEBUG=true --rm -it -p 8080:8080 bert-cpp
+```
diff --git a/examples/semantic-todo/go.mod b/examples/semantic-todo/go.mod
new file mode 100644
index 00000000..7869e329
--- /dev/null
+++ b/examples/semantic-todo/go.mod
@@ -0,0 +1,18 @@
+module semantic-todo
+
+go 1.21.6
+
+require (
+ github.com/gdamore/tcell/v2 v2.7.1
+ github.com/rivo/tview v0.0.0-20240307173318-e804876934a1
+)
+
+require (
+ github.com/gdamore/encoding v1.0.0 // indirect
+ github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
+ github.com/mattn/go-runewidth v0.0.15 // indirect
+ github.com/rivo/uniseg v0.4.7 // indirect
+ golang.org/x/sys v0.17.0 // indirect
+ golang.org/x/term v0.17.0 // indirect
+ golang.org/x/text v0.14.0 // indirect
+)
diff --git a/examples/semantic-todo/go.sum b/examples/semantic-todo/go.sum
new file mode 100644
index 00000000..320d4060
--- /dev/null
+++ b/examples/semantic-todo/go.sum
@@ -0,0 +1,50 @@
+github.com/gdamore/encoding v1.0.0 h1:+7OoQ1Bc6eTm5niUzBa0Ctsh6JbMW6Ra+YNuAtDBdko=
+github.com/gdamore/encoding v1.0.0/go.mod h1:alR0ol34c49FCSBLjhosxzcPHQbf2trDkoo5dl+VrEg=
+github.com/gdamore/tcell/v2 v2.7.1 h1:TiCcmpWHiAU7F0rA2I3S2Y4mmLmO9KHxJ7E1QhYzQbc=
+github.com/gdamore/tcell/v2 v2.7.1/go.mod h1:dSXtXTSK0VsW1biw65DZLZ2NKr7j0qP/0J7ONmsraWg=
+github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY=
+github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
+github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U=
+github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
+github.com/rivo/tview v0.0.0-20240307173318-e804876934a1 h1:bWLHTRekAy497pE7+nXSuzXwwFHI0XauRzz6roUvY+s=
+github.com/rivo/tview v0.0.0-20240307173318-e804876934a1/go.mod h1:02iFIz7K/A9jGCvrizLPvoqr4cEIx7q54RH5Qudkrss=
+github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
+github.com/rivo/uniseg v0.4.3/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
+github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
+github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
+github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
+golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
+golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
+golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
+golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y=
+golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
+golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
+golang.org/x/term v0.17.0 h1:mkTF7LCd6WGJNL3K1Ad7kwxNfYAW6a8a8QqtMblp/4U=
+golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
+golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
+golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
+golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
+golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
diff --git a/examples/semantic-todo/main.go b/examples/semantic-todo/main.go
new file mode 100644
index 00000000..371fe6b9
--- /dev/null
+++ b/examples/semantic-todo/main.go
@@ -0,0 +1,352 @@
+package main
+
+import (
+ "bytes"
+ "encoding/json"
+ "errors"
+ "fmt"
+ "io"
+ "net/http"
+
+ "github.com/gdamore/tcell/v2"
+ "github.com/rivo/tview"
+)
+
+const (
+ localAI string = "http://localhost:8080"
+ rootStatus string = "[::b][::-]: Add Task [::b]/[::-]: Search Task [::b][::-]: Exit"
+ inputStatus string = "Press [::b][::-] to submit the task, [::b][::-] to cancel"
+)
+
+type Task struct {
+ Description string
+ Similarity float32
+}
+
+type AppState int
+
+const (
+ StateRoot AppState = iota
+ StateInput
+ StateSearch
+)
+
+type App struct {
+ state AppState
+ tasks []Task
+ app *tview.Application
+ flex *tview.Flex
+ table *tview.Table
+}
+
+func NewApp() *App {
+ return &App{
+ state: StateRoot,
+ tasks: []Task{
+ {Description: "Take the dog for a walk (after I get a dog)"},
+ {Description: "Go to the toilet"},
+ {Description: "Allow TODOs to be marked completed or removed"},
+ },
+ }
+}
+
+func getEmbeddings(description string) ([]float32, error) {
+ // Define the request payload
+ payload := map[string]interface{}{
+ "model": "bert-cpp-minilm-v6",
+ "input": description,
+ }
+
+ // Marshal the payload into JSON
+ jsonPayload, err := json.Marshal(payload)
+ if err != nil {
+ return nil, err
+ }
+
+ // Make the HTTP request to the local OpenAI embeddings API
+ resp, err := http.Post(localAI+"/embeddings", "application/json", bytes.NewBuffer(jsonPayload))
+ if err != nil {
+ return nil, err
+ }
+ defer resp.Body.Close()
+
+ // Check if the request was successful
+ if resp.StatusCode != http.StatusOK {
+ return nil, fmt.Errorf("request to embeddings API failed with status code: %d", resp.StatusCode)
+ }
+
+ // Parse the response body
+ var result struct {
+ Data []struct {
+ Embedding []float32 `json:"embedding"`
+ } `json:"data"`
+ }
+ if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
+ return nil, err
+ }
+
+ // Return the embedding
+ if len(result.Data) > 0 {
+ return result.Data[0].Embedding, nil
+ }
+ return nil, errors.New("no embedding received from API")
+}
+
+type StoresSet struct {
+ Store string `json:"store,omitempty" yaml:"store,omitempty"`
+
+ Keys [][]float32 `json:"keys" yaml:"keys"`
+ Values []string `json:"values" yaml:"values"`
+}
+
+func postTasksToExternalService(tasks []Task) error {
+ keys := make([][]float32, 0, len(tasks))
+ // Get the embeddings for the task description
+ for _, task := range tasks {
+ embedding, err := getEmbeddings(task.Description)
+ if err != nil {
+ return err
+ }
+ keys = append(keys, embedding)
+ }
+
+ values := make([]string, 0, len(tasks))
+ for _, task := range tasks {
+ values = append(values, task.Description)
+ }
+
+ // Construct the StoresSet object
+ storesSet := StoresSet{
+ Store: "tasks_store", // Assuming you have a specific store name
+ Keys: keys,
+ Values: values,
+ }
+
+ // Marshal the StoresSet object into JSON
+ jsonData, err := json.Marshal(storesSet)
+ if err != nil {
+ return err
+ }
+
+ // Make the HTTP POST request to the external service
+ resp, err := http.Post(localAI+"/stores/set", "application/json", bytes.NewBuffer(jsonData))
+ if err != nil {
+ return err
+ }
+ defer resp.Body.Close()
+
+ // Check if the request was successful
+ if resp.StatusCode != http.StatusOK {
+ // read resp body into string
+ body, err := io.ReadAll(resp.Body)
+ if err != nil {
+ return err
+ }
+ return fmt.Errorf("store request failed with status code: %d: %s", resp.StatusCode, body)
+ }
+
+ return nil
+}
+
+type StoresFind struct {
+ Store string `json:"store,omitempty" yaml:"store,omitempty"`
+
+ Key []float32 `json:"key" yaml:"key"`
+ Topk int `json:"topk" yaml:"topk"`
+}
+
+type StoresFindResponse struct {
+ Keys [][]float32 `json:"keys" yaml:"keys"`
+ Values []string `json:"values" yaml:"values"`
+ Similarities []float32 `json:"similarities" yaml:"similarities"`
+}
+
+func findSimilarTexts(inputText string, topk int) (StoresFindResponse, error) {
+ // Initialize an empty response object
+ response := StoresFindResponse{}
+
+ // Get the embedding for the input text
+ embedding, err := getEmbeddings(inputText)
+ if err != nil {
+ return response, err
+ }
+
+ // Construct the StoresFind object
+ storesFind := StoresFind{
+ Store: "tasks_store", // Assuming you have a specific store name
+ Key: embedding,
+ Topk: topk,
+ }
+
+ // Marshal the StoresFind object into JSON
+ jsonData, err := json.Marshal(storesFind)
+ if err != nil {
+ return response, err
+ }
+
+ // Make the HTTP POST request to the external service's /stores/find endpoint
+ resp, err := http.Post(localAI+"/stores/find", "application/json", bytes.NewBuffer(jsonData))
+ if err != nil {
+ return response, err
+ }
+ defer resp.Body.Close()
+
+ // Check if the request was successful
+ if resp.StatusCode != http.StatusOK {
+ return response, fmt.Errorf("request to /stores/find failed with status code: %d", resp.StatusCode)
+ }
+
+ // Parse the response body to retrieve similar texts and similarities
+ if err := json.NewDecoder(resp.Body).Decode(&response); err != nil {
+ return response, err
+ }
+
+ return response, nil
+}
+
+func (app *App) updateUI() {
+ // Clear the flex layout
+ app.flex.Clear()
+ app.flex.SetDirection(tview.FlexColumn)
+ app.flex.AddItem(nil, 0, 1, false)
+
+ midCol := tview.NewFlex()
+ midCol.SetDirection(tview.FlexRow)
+ midCol.AddItem(nil, 0, 1, false)
+
+ // Create a new table.
+ app.table.Clear()
+ app.table.SetBorders(true)
+
+ // Set table headers
+ app.table.SetCell(0, 0, tview.NewTableCell("Description").SetAlign(tview.AlignLeft).SetExpansion(1).SetAttributes(tcell.AttrBold))
+ app.table.SetCell(0, 1, tview.NewTableCell("Similarity").SetAlign(tview.AlignCenter).SetExpansion(0).SetAttributes(tcell.AttrBold))
+
+ // Add the tasks to the table.
+ for i, task := range app.tasks {
+ row := i + 1
+ app.table.SetCell(row, 0, tview.NewTableCell(task.Description))
+ app.table.SetCell(row, 1, tview.NewTableCell(fmt.Sprintf("%.2f", task.Similarity)))
+ }
+
+ if app.state == StateInput {
+ inputField := tview.NewInputField()
+ inputField.
+ SetLabel("New Task: ").
+ SetFieldWidth(0).
+ SetDoneFunc(func(key tcell.Key) {
+ if key == tcell.KeyEnter {
+ task := Task{Description: inputField.GetText()}
+ app.tasks = append(app.tasks, task)
+ app.state = StateRoot
+ postTasksToExternalService([]Task{task})
+ }
+ app.updateUI()
+ })
+ midCol.AddItem(inputField, 3, 2, true)
+ app.app.SetFocus(inputField)
+ } else if app.state == StateSearch {
+ searchField := tview.NewInputField()
+ searchField.SetLabel("Search: ").
+ SetFieldWidth(0).
+ SetDoneFunc(func(key tcell.Key) {
+ if key == tcell.KeyEnter {
+ similar, err := findSimilarTexts(searchField.GetText(), 100)
+ if err != nil {
+ panic(err)
+ }
+ app.tasks = make([]Task, len(similar.Keys))
+ for i, v := range similar.Values {
+ app.tasks[i] = Task{Description: v, Similarity: similar.Similarities[i]}
+ }
+ }
+ app.updateUI()
+ })
+ midCol.AddItem(searchField, 3, 2, true)
+ app.app.SetFocus(searchField)
+ } else {
+ midCol.AddItem(nil, 3, 1, false)
+ }
+
+ midCol.AddItem(app.table, 0, 2, true)
+
+ // Add the status bar to the flex layout
+ statusBar := tview.NewTextView().
+ SetText(rootStatus).
+ SetDynamicColors(true).
+ SetTextAlign(tview.AlignCenter)
+ if app.state == StateInput {
+ statusBar.SetText(inputStatus)
+ }
+ midCol.AddItem(statusBar, 1, 1, false)
+ midCol.AddItem(nil, 0, 1, false)
+
+ app.flex.AddItem(midCol, 0, 10, true)
+ app.flex.AddItem(nil, 0, 1, false)
+
+ // Set the flex as the root element
+ app.app.SetRoot(app.flex, true)
+}
+
+func main() {
+ app := NewApp()
+ tApp := tview.NewApplication()
+ flex := tview.NewFlex().SetDirection(tview.FlexRow)
+ table := tview.NewTable()
+
+ app.app = tApp
+ app.flex = flex
+ app.table = table
+
+ app.updateUI() // Initial UI setup
+
+ app.app.SetInputCapture(func(event *tcell.EventKey) *tcell.EventKey {
+ switch app.state {
+ case StateRoot:
+ // Handle key events when in the root state
+ switch event.Key() {
+ case tcell.KeyRune:
+ switch event.Rune() {
+ case ' ':
+ app.state = StateInput
+ app.updateUI()
+ return nil // Event is handled
+ case '/':
+ app.state = StateSearch
+ app.updateUI()
+ return nil // Event is handled
+ }
+ }
+
+ case StateInput:
+ // Handle key events when in the input state
+ if event.Key() == tcell.KeyEsc {
+ // Exit input state without adding a task
+ app.state = StateRoot
+ app.updateUI()
+ return nil // Event is handled
+ }
+
+ case StateSearch:
+ // Handle key events when in the search state
+ if event.Key() == tcell.KeyEsc {
+ // Exit search state
+ app.state = StateRoot
+ app.updateUI()
+ return nil // Event is handled
+ }
+ }
+
+ // Return the event for further processing by tview
+ return event
+ })
+
+ if err := postTasksToExternalService(app.tasks); err != nil {
+ panic(err)
+ }
+
+ // Start the application
+ if err := app.app.Run(); err != nil {
+ panic(err)
+ }
+}
diff --git a/pkg/grpc/backend.go b/pkg/grpc/backend.go
index 22933d58..8fb8c39d 100644
--- a/pkg/grpc/backend.go
+++ b/pkg/grpc/backend.go
@@ -44,4 +44,9 @@ type Backend interface {
AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.Result, error)
TokenizeString(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.TokenizationResponse, error)
Status(ctx context.Context) (*pb.StatusResponse, error)
+
+ StoresSet(ctx context.Context, in *pb.StoresSetOptions, opts ...grpc.CallOption) (*pb.Result, error)
+ StoresDelete(ctx context.Context, in *pb.StoresDeleteOptions, opts ...grpc.CallOption) (*pb.Result, error)
+ StoresGet(ctx context.Context, in *pb.StoresGetOptions, opts ...grpc.CallOption) (*pb.StoresGetResult, error)
+ StoresFind(ctx context.Context, in *pb.StoresFindOptions, opts ...grpc.CallOption) (*pb.StoresFindResult, error)
}
diff --git a/pkg/grpc/base/base.go b/pkg/grpc/base/base.go
index 89c8785e..0af5d94f 100644
--- a/pkg/grpc/base/base.go
+++ b/pkg/grpc/base/base.go
@@ -72,6 +72,22 @@ func (llm *Base) Status() (pb.StatusResponse, error) {
}, nil
}
+func (llm *Base) StoresSet(*pb.StoresSetOptions) error {
+ return fmt.Errorf("unimplemented")
+}
+
+func (llm *Base) StoresGet(*pb.StoresGetOptions) (pb.StoresGetResult, error) {
+ return pb.StoresGetResult{}, fmt.Errorf("unimplemented")
+}
+
+func (llm *Base) StoresDelete(*pb.StoresDeleteOptions) error {
+ return fmt.Errorf("unimplemented")
+}
+
+func (llm *Base) StoresFind(*pb.StoresFindOptions) (pb.StoresFindResult, error) {
+ return pb.StoresFindResult{}, fmt.Errorf("unimplemented")
+}
+
func memoryUsage() *pb.MemoryUsageData {
mud := pb.MemoryUsageData{
Breakdown: make(map[string]uint64),
diff --git a/pkg/grpc/client.go b/pkg/grpc/client.go
index 9058db05..882db12a 100644
--- a/pkg/grpc/client.go
+++ b/pkg/grpc/client.go
@@ -291,3 +291,67 @@ func (c *Client) Status(ctx context.Context) (*pb.StatusResponse, error) {
client := pb.NewBackendClient(conn)
return client.Status(ctx, &pb.HealthMessage{})
}
+
+func (c *Client) StoresSet(ctx context.Context, in *pb.StoresSetOptions, opts ...grpc.CallOption) (*pb.Result, error) {
+ if !c.parallel {
+ c.opMutex.Lock()
+ defer c.opMutex.Unlock()
+ }
+ c.setBusy(true)
+ defer c.setBusy(false)
+ conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
+ if err != nil {
+ return nil, err
+ }
+ defer conn.Close()
+ client := pb.NewBackendClient(conn)
+ return client.StoresSet(ctx, in, opts...)
+}
+
+func (c *Client) StoresDelete(ctx context.Context, in *pb.StoresDeleteOptions, opts ...grpc.CallOption) (*pb.Result, error) {
+ if !c.parallel {
+ c.opMutex.Lock()
+ defer c.opMutex.Unlock()
+ }
+ c.setBusy(true)
+ defer c.setBusy(false)
+ conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
+ if err != nil {
+ return nil, err
+ }
+ defer conn.Close()
+ client := pb.NewBackendClient(conn)
+ return client.StoresDelete(ctx, in, opts...)
+}
+
+func (c *Client) StoresGet(ctx context.Context, in *pb.StoresGetOptions, opts ...grpc.CallOption) (*pb.StoresGetResult, error) {
+ if !c.parallel {
+ c.opMutex.Lock()
+ defer c.opMutex.Unlock()
+ }
+ c.setBusy(true)
+ defer c.setBusy(false)
+ conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
+ if err != nil {
+ return nil, err
+ }
+ defer conn.Close()
+ client := pb.NewBackendClient(conn)
+ return client.StoresGet(ctx, in, opts...)
+}
+
+func (c *Client) StoresFind(ctx context.Context, in *pb.StoresFindOptions, opts ...grpc.CallOption) (*pb.StoresFindResult, error) {
+ if !c.parallel {
+ c.opMutex.Lock()
+ defer c.opMutex.Unlock()
+ }
+ c.setBusy(true)
+ defer c.setBusy(false)
+ conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
+ if err != nil {
+ return nil, err
+ }
+ defer conn.Close()
+ client := pb.NewBackendClient(conn)
+ return client.StoresFind(ctx, in, opts...)
+}
diff --git a/pkg/grpc/embed.go b/pkg/grpc/embed.go
index 228b1df5..73b185a3 100644
--- a/pkg/grpc/embed.go
+++ b/pkg/grpc/embed.go
@@ -85,6 +85,22 @@ func (e *embedBackend) Status(ctx context.Context) (*pb.StatusResponse, error) {
return e.s.Status(ctx, &pb.HealthMessage{})
}
+func (e *embedBackend) StoresSet(ctx context.Context, in *pb.StoresSetOptions, opts ...grpc.CallOption) (*pb.Result, error) {
+ return e.s.StoresSet(ctx, in)
+}
+
+func (e *embedBackend) StoresDelete(ctx context.Context, in *pb.StoresDeleteOptions, opts ...grpc.CallOption) (*pb.Result, error) {
+ return e.s.StoresDelete(ctx, in)
+}
+
+func (e *embedBackend) StoresGet(ctx context.Context, in *pb.StoresGetOptions, opts ...grpc.CallOption) (*pb.StoresGetResult, error) {
+ return e.s.StoresGet(ctx, in)
+}
+
+func (e *embedBackend) StoresFind(ctx context.Context, in *pb.StoresFindOptions, opts ...grpc.CallOption) (*pb.StoresFindResult, error) {
+ return e.s.StoresFind(ctx, in)
+}
+
type embedBackendServerStream struct {
ctx context.Context
fn func(s []byte)
diff --git a/pkg/grpc/interface.go b/pkg/grpc/interface.go
index 1cc7cb3d..4d06544d 100644
--- a/pkg/grpc/interface.go
+++ b/pkg/grpc/interface.go
@@ -19,6 +19,11 @@ type LLM interface {
TTS(*pb.TTSRequest) error
TokenizeString(*pb.PredictOptions) (pb.TokenizationResponse, error)
Status() (pb.StatusResponse, error)
+
+ StoresSet(*pb.StoresSetOptions) error
+ StoresDelete(*pb.StoresDeleteOptions) error
+ StoresGet(*pb.StoresGetOptions) (pb.StoresGetResult, error)
+ StoresFind(*pb.StoresFindOptions) (pb.StoresFindResult, error)
}
func newReply(s string) *pb.Reply {
diff --git a/pkg/grpc/proto/backend.pb.go b/pkg/grpc/proto/backend.pb.go
index 48551499..cc687577 100644
--- a/pkg/grpc/proto/backend.pb.go
+++ b/pkg/grpc/proto/backend.pb.go
@@ -1,6 +1,6 @@
// Code generated by protoc-gen-go. DO NOT EDIT.
// versions:
-// protoc-gen-go v1.26.0
+// protoc-gen-go v1.31.0
// protoc v4.23.4
// source: backend.proto
@@ -69,7 +69,423 @@ func (x StatusResponse_State) Number() protoreflect.EnumNumber {
// Deprecated: Use StatusResponse_State.Descriptor instead.
func (StatusResponse_State) EnumDescriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{13, 0}
+ return file_backend_proto_rawDescGZIP(), []int{21, 0}
+}
+
+type StoresKey struct {
+ state protoimpl.MessageState
+ sizeCache protoimpl.SizeCache
+ unknownFields protoimpl.UnknownFields
+
+ Floats []float32 `protobuf:"fixed32,1,rep,packed,name=Floats,proto3" json:"Floats,omitempty"`
+}
+
+func (x *StoresKey) Reset() {
+ *x = StoresKey{}
+ if protoimpl.UnsafeEnabled {
+ mi := &file_backend_proto_msgTypes[0]
+ ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+ ms.StoreMessageInfo(mi)
+ }
+}
+
+func (x *StoresKey) String() string {
+ return protoimpl.X.MessageStringOf(x)
+}
+
+func (*StoresKey) ProtoMessage() {}
+
+func (x *StoresKey) ProtoReflect() protoreflect.Message {
+ mi := &file_backend_proto_msgTypes[0]
+ if protoimpl.UnsafeEnabled && x != nil {
+ ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+ if ms.LoadMessageInfo() == nil {
+ ms.StoreMessageInfo(mi)
+ }
+ return ms
+ }
+ return mi.MessageOf(x)
+}
+
+// Deprecated: Use StoresKey.ProtoReflect.Descriptor instead.
+func (*StoresKey) Descriptor() ([]byte, []int) {
+ return file_backend_proto_rawDescGZIP(), []int{0}
+}
+
+func (x *StoresKey) GetFloats() []float32 {
+ if x != nil {
+ return x.Floats
+ }
+ return nil
+}
+
+type StoresValue struct {
+ state protoimpl.MessageState
+ sizeCache protoimpl.SizeCache
+ unknownFields protoimpl.UnknownFields
+
+ Bytes []byte `protobuf:"bytes,1,opt,name=Bytes,proto3" json:"Bytes,omitempty"`
+}
+
+func (x *StoresValue) Reset() {
+ *x = StoresValue{}
+ if protoimpl.UnsafeEnabled {
+ mi := &file_backend_proto_msgTypes[1]
+ ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+ ms.StoreMessageInfo(mi)
+ }
+}
+
+func (x *StoresValue) String() string {
+ return protoimpl.X.MessageStringOf(x)
+}
+
+func (*StoresValue) ProtoMessage() {}
+
+func (x *StoresValue) ProtoReflect() protoreflect.Message {
+ mi := &file_backend_proto_msgTypes[1]
+ if protoimpl.UnsafeEnabled && x != nil {
+ ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+ if ms.LoadMessageInfo() == nil {
+ ms.StoreMessageInfo(mi)
+ }
+ return ms
+ }
+ return mi.MessageOf(x)
+}
+
+// Deprecated: Use StoresValue.ProtoReflect.Descriptor instead.
+func (*StoresValue) Descriptor() ([]byte, []int) {
+ return file_backend_proto_rawDescGZIP(), []int{1}
+}
+
+func (x *StoresValue) GetBytes() []byte {
+ if x != nil {
+ return x.Bytes
+ }
+ return nil
+}
+
+type StoresSetOptions struct {
+ state protoimpl.MessageState
+ sizeCache protoimpl.SizeCache
+ unknownFields protoimpl.UnknownFields
+
+ Keys []*StoresKey `protobuf:"bytes,1,rep,name=Keys,proto3" json:"Keys,omitempty"`
+ Values []*StoresValue `protobuf:"bytes,2,rep,name=Values,proto3" json:"Values,omitempty"`
+}
+
+func (x *StoresSetOptions) Reset() {
+ *x = StoresSetOptions{}
+ if protoimpl.UnsafeEnabled {
+ mi := &file_backend_proto_msgTypes[2]
+ ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+ ms.StoreMessageInfo(mi)
+ }
+}
+
+func (x *StoresSetOptions) String() string {
+ return protoimpl.X.MessageStringOf(x)
+}
+
+func (*StoresSetOptions) ProtoMessage() {}
+
+func (x *StoresSetOptions) ProtoReflect() protoreflect.Message {
+ mi := &file_backend_proto_msgTypes[2]
+ if protoimpl.UnsafeEnabled && x != nil {
+ ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+ if ms.LoadMessageInfo() == nil {
+ ms.StoreMessageInfo(mi)
+ }
+ return ms
+ }
+ return mi.MessageOf(x)
+}
+
+// Deprecated: Use StoresSetOptions.ProtoReflect.Descriptor instead.
+func (*StoresSetOptions) Descriptor() ([]byte, []int) {
+ return file_backend_proto_rawDescGZIP(), []int{2}
+}
+
+func (x *StoresSetOptions) GetKeys() []*StoresKey {
+ if x != nil {
+ return x.Keys
+ }
+ return nil
+}
+
+func (x *StoresSetOptions) GetValues() []*StoresValue {
+ if x != nil {
+ return x.Values
+ }
+ return nil
+}
+
+type StoresDeleteOptions struct {
+ state protoimpl.MessageState
+ sizeCache protoimpl.SizeCache
+ unknownFields protoimpl.UnknownFields
+
+ Keys []*StoresKey `protobuf:"bytes,1,rep,name=Keys,proto3" json:"Keys,omitempty"`
+}
+
+func (x *StoresDeleteOptions) Reset() {
+ *x = StoresDeleteOptions{}
+ if protoimpl.UnsafeEnabled {
+ mi := &file_backend_proto_msgTypes[3]
+ ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+ ms.StoreMessageInfo(mi)
+ }
+}
+
+func (x *StoresDeleteOptions) String() string {
+ return protoimpl.X.MessageStringOf(x)
+}
+
+func (*StoresDeleteOptions) ProtoMessage() {}
+
+func (x *StoresDeleteOptions) ProtoReflect() protoreflect.Message {
+ mi := &file_backend_proto_msgTypes[3]
+ if protoimpl.UnsafeEnabled && x != nil {
+ ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+ if ms.LoadMessageInfo() == nil {
+ ms.StoreMessageInfo(mi)
+ }
+ return ms
+ }
+ return mi.MessageOf(x)
+}
+
+// Deprecated: Use StoresDeleteOptions.ProtoReflect.Descriptor instead.
+func (*StoresDeleteOptions) Descriptor() ([]byte, []int) {
+ return file_backend_proto_rawDescGZIP(), []int{3}
+}
+
+func (x *StoresDeleteOptions) GetKeys() []*StoresKey {
+ if x != nil {
+ return x.Keys
+ }
+ return nil
+}
+
+type StoresGetOptions struct {
+ state protoimpl.MessageState
+ sizeCache protoimpl.SizeCache
+ unknownFields protoimpl.UnknownFields
+
+ Keys []*StoresKey `protobuf:"bytes,1,rep,name=Keys,proto3" json:"Keys,omitempty"`
+}
+
+func (x *StoresGetOptions) Reset() {
+ *x = StoresGetOptions{}
+ if protoimpl.UnsafeEnabled {
+ mi := &file_backend_proto_msgTypes[4]
+ ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+ ms.StoreMessageInfo(mi)
+ }
+}
+
+func (x *StoresGetOptions) String() string {
+ return protoimpl.X.MessageStringOf(x)
+}
+
+func (*StoresGetOptions) ProtoMessage() {}
+
+func (x *StoresGetOptions) ProtoReflect() protoreflect.Message {
+ mi := &file_backend_proto_msgTypes[4]
+ if protoimpl.UnsafeEnabled && x != nil {
+ ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+ if ms.LoadMessageInfo() == nil {
+ ms.StoreMessageInfo(mi)
+ }
+ return ms
+ }
+ return mi.MessageOf(x)
+}
+
+// Deprecated: Use StoresGetOptions.ProtoReflect.Descriptor instead.
+func (*StoresGetOptions) Descriptor() ([]byte, []int) {
+ return file_backend_proto_rawDescGZIP(), []int{4}
+}
+
+func (x *StoresGetOptions) GetKeys() []*StoresKey {
+ if x != nil {
+ return x.Keys
+ }
+ return nil
+}
+
+type StoresGetResult struct {
+ state protoimpl.MessageState
+ sizeCache protoimpl.SizeCache
+ unknownFields protoimpl.UnknownFields
+
+ Keys []*StoresKey `protobuf:"bytes,1,rep,name=Keys,proto3" json:"Keys,omitempty"`
+ Values []*StoresValue `protobuf:"bytes,2,rep,name=Values,proto3" json:"Values,omitempty"`
+}
+
+func (x *StoresGetResult) Reset() {
+ *x = StoresGetResult{}
+ if protoimpl.UnsafeEnabled {
+ mi := &file_backend_proto_msgTypes[5]
+ ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+ ms.StoreMessageInfo(mi)
+ }
+}
+
+func (x *StoresGetResult) String() string {
+ return protoimpl.X.MessageStringOf(x)
+}
+
+func (*StoresGetResult) ProtoMessage() {}
+
+func (x *StoresGetResult) ProtoReflect() protoreflect.Message {
+ mi := &file_backend_proto_msgTypes[5]
+ if protoimpl.UnsafeEnabled && x != nil {
+ ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+ if ms.LoadMessageInfo() == nil {
+ ms.StoreMessageInfo(mi)
+ }
+ return ms
+ }
+ return mi.MessageOf(x)
+}
+
+// Deprecated: Use StoresGetResult.ProtoReflect.Descriptor instead.
+func (*StoresGetResult) Descriptor() ([]byte, []int) {
+ return file_backend_proto_rawDescGZIP(), []int{5}
+}
+
+func (x *StoresGetResult) GetKeys() []*StoresKey {
+ if x != nil {
+ return x.Keys
+ }
+ return nil
+}
+
+func (x *StoresGetResult) GetValues() []*StoresValue {
+ if x != nil {
+ return x.Values
+ }
+ return nil
+}
+
+type StoresFindOptions struct {
+ state protoimpl.MessageState
+ sizeCache protoimpl.SizeCache
+ unknownFields protoimpl.UnknownFields
+
+ Key *StoresKey `protobuf:"bytes,1,opt,name=Key,proto3" json:"Key,omitempty"`
+ TopK int32 `protobuf:"varint,2,opt,name=TopK,proto3" json:"TopK,omitempty"`
+}
+
+func (x *StoresFindOptions) Reset() {
+ *x = StoresFindOptions{}
+ if protoimpl.UnsafeEnabled {
+ mi := &file_backend_proto_msgTypes[6]
+ ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+ ms.StoreMessageInfo(mi)
+ }
+}
+
+func (x *StoresFindOptions) String() string {
+ return protoimpl.X.MessageStringOf(x)
+}
+
+func (*StoresFindOptions) ProtoMessage() {}
+
+func (x *StoresFindOptions) ProtoReflect() protoreflect.Message {
+ mi := &file_backend_proto_msgTypes[6]
+ if protoimpl.UnsafeEnabled && x != nil {
+ ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+ if ms.LoadMessageInfo() == nil {
+ ms.StoreMessageInfo(mi)
+ }
+ return ms
+ }
+ return mi.MessageOf(x)
+}
+
+// Deprecated: Use StoresFindOptions.ProtoReflect.Descriptor instead.
+func (*StoresFindOptions) Descriptor() ([]byte, []int) {
+ return file_backend_proto_rawDescGZIP(), []int{6}
+}
+
+func (x *StoresFindOptions) GetKey() *StoresKey {
+ if x != nil {
+ return x.Key
+ }
+ return nil
+}
+
+func (x *StoresFindOptions) GetTopK() int32 {
+ if x != nil {
+ return x.TopK
+ }
+ return 0
+}
+
+type StoresFindResult struct {
+ state protoimpl.MessageState
+ sizeCache protoimpl.SizeCache
+ unknownFields protoimpl.UnknownFields
+
+ Keys []*StoresKey `protobuf:"bytes,1,rep,name=Keys,proto3" json:"Keys,omitempty"`
+ Values []*StoresValue `protobuf:"bytes,2,rep,name=Values,proto3" json:"Values,omitempty"`
+ Similarities []float32 `protobuf:"fixed32,3,rep,packed,name=Similarities,proto3" json:"Similarities,omitempty"`
+}
+
+func (x *StoresFindResult) Reset() {
+ *x = StoresFindResult{}
+ if protoimpl.UnsafeEnabled {
+ mi := &file_backend_proto_msgTypes[7]
+ ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+ ms.StoreMessageInfo(mi)
+ }
+}
+
+func (x *StoresFindResult) String() string {
+ return protoimpl.X.MessageStringOf(x)
+}
+
+func (*StoresFindResult) ProtoMessage() {}
+
+func (x *StoresFindResult) ProtoReflect() protoreflect.Message {
+ mi := &file_backend_proto_msgTypes[7]
+ if protoimpl.UnsafeEnabled && x != nil {
+ ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+ if ms.LoadMessageInfo() == nil {
+ ms.StoreMessageInfo(mi)
+ }
+ return ms
+ }
+ return mi.MessageOf(x)
+}
+
+// Deprecated: Use StoresFindResult.ProtoReflect.Descriptor instead.
+func (*StoresFindResult) Descriptor() ([]byte, []int) {
+ return file_backend_proto_rawDescGZIP(), []int{7}
+}
+
+func (x *StoresFindResult) GetKeys() []*StoresKey {
+ if x != nil {
+ return x.Keys
+ }
+ return nil
+}
+
+func (x *StoresFindResult) GetValues() []*StoresValue {
+ if x != nil {
+ return x.Values
+ }
+ return nil
+}
+
+func (x *StoresFindResult) GetSimilarities() []float32 {
+ if x != nil {
+ return x.Similarities
+ }
+ return nil
}
type HealthMessage struct {
@@ -81,7 +497,7 @@ type HealthMessage struct {
func (x *HealthMessage) Reset() {
*x = HealthMessage{}
if protoimpl.UnsafeEnabled {
- mi := &file_backend_proto_msgTypes[0]
+ mi := &file_backend_proto_msgTypes[8]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
@@ -94,7 +510,7 @@ func (x *HealthMessage) String() string {
func (*HealthMessage) ProtoMessage() {}
func (x *HealthMessage) ProtoReflect() protoreflect.Message {
- mi := &file_backend_proto_msgTypes[0]
+ mi := &file_backend_proto_msgTypes[8]
if protoimpl.UnsafeEnabled && x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
@@ -107,7 +523,7 @@ func (x *HealthMessage) ProtoReflect() protoreflect.Message {
// Deprecated: Use HealthMessage.ProtoReflect.Descriptor instead.
func (*HealthMessage) Descriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{0}
+ return file_backend_proto_rawDescGZIP(), []int{8}
}
// The request message containing the user's name.
@@ -162,7 +578,7 @@ type PredictOptions struct {
func (x *PredictOptions) Reset() {
*x = PredictOptions{}
if protoimpl.UnsafeEnabled {
- mi := &file_backend_proto_msgTypes[1]
+ mi := &file_backend_proto_msgTypes[9]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
@@ -175,7 +591,7 @@ func (x *PredictOptions) String() string {
func (*PredictOptions) ProtoMessage() {}
func (x *PredictOptions) ProtoReflect() protoreflect.Message {
- mi := &file_backend_proto_msgTypes[1]
+ mi := &file_backend_proto_msgTypes[9]
if protoimpl.UnsafeEnabled && x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
@@ -188,7 +604,7 @@ func (x *PredictOptions) ProtoReflect() protoreflect.Message {
// Deprecated: Use PredictOptions.ProtoReflect.Descriptor instead.
func (*PredictOptions) Descriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{1}
+ return file_backend_proto_rawDescGZIP(), []int{9}
}
func (x *PredictOptions) GetPrompt() string {
@@ -490,7 +906,7 @@ type Reply struct {
func (x *Reply) Reset() {
*x = Reply{}
if protoimpl.UnsafeEnabled {
- mi := &file_backend_proto_msgTypes[2]
+ mi := &file_backend_proto_msgTypes[10]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
@@ -503,7 +919,7 @@ func (x *Reply) String() string {
func (*Reply) ProtoMessage() {}
func (x *Reply) ProtoReflect() protoreflect.Message {
- mi := &file_backend_proto_msgTypes[2]
+ mi := &file_backend_proto_msgTypes[10]
if protoimpl.UnsafeEnabled && x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
@@ -516,7 +932,7 @@ func (x *Reply) ProtoReflect() protoreflect.Message {
// Deprecated: Use Reply.ProtoReflect.Descriptor instead.
func (*Reply) Descriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{2}
+ return file_backend_proto_rawDescGZIP(), []int{10}
}
func (x *Reply) GetMessage() []byte {
@@ -594,7 +1010,7 @@ type ModelOptions struct {
func (x *ModelOptions) Reset() {
*x = ModelOptions{}
if protoimpl.UnsafeEnabled {
- mi := &file_backend_proto_msgTypes[3]
+ mi := &file_backend_proto_msgTypes[11]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
@@ -607,7 +1023,7 @@ func (x *ModelOptions) String() string {
func (*ModelOptions) ProtoMessage() {}
func (x *ModelOptions) ProtoReflect() protoreflect.Message {
- mi := &file_backend_proto_msgTypes[3]
+ mi := &file_backend_proto_msgTypes[11]
if protoimpl.UnsafeEnabled && x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
@@ -620,7 +1036,7 @@ func (x *ModelOptions) ProtoReflect() protoreflect.Message {
// Deprecated: Use ModelOptions.ProtoReflect.Descriptor instead.
func (*ModelOptions) Descriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{3}
+ return file_backend_proto_rawDescGZIP(), []int{11}
}
func (x *ModelOptions) GetModel() string {
@@ -1013,7 +1429,7 @@ type Result struct {
func (x *Result) Reset() {
*x = Result{}
if protoimpl.UnsafeEnabled {
- mi := &file_backend_proto_msgTypes[4]
+ mi := &file_backend_proto_msgTypes[12]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
@@ -1026,7 +1442,7 @@ func (x *Result) String() string {
func (*Result) ProtoMessage() {}
func (x *Result) ProtoReflect() protoreflect.Message {
- mi := &file_backend_proto_msgTypes[4]
+ mi := &file_backend_proto_msgTypes[12]
if protoimpl.UnsafeEnabled && x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
@@ -1039,7 +1455,7 @@ func (x *Result) ProtoReflect() protoreflect.Message {
// Deprecated: Use Result.ProtoReflect.Descriptor instead.
func (*Result) Descriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{4}
+ return file_backend_proto_rawDescGZIP(), []int{12}
}
func (x *Result) GetMessage() string {
@@ -1067,7 +1483,7 @@ type EmbeddingResult struct {
func (x *EmbeddingResult) Reset() {
*x = EmbeddingResult{}
if protoimpl.UnsafeEnabled {
- mi := &file_backend_proto_msgTypes[5]
+ mi := &file_backend_proto_msgTypes[13]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
@@ -1080,7 +1496,7 @@ func (x *EmbeddingResult) String() string {
func (*EmbeddingResult) ProtoMessage() {}
func (x *EmbeddingResult) ProtoReflect() protoreflect.Message {
- mi := &file_backend_proto_msgTypes[5]
+ mi := &file_backend_proto_msgTypes[13]
if protoimpl.UnsafeEnabled && x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
@@ -1093,7 +1509,7 @@ func (x *EmbeddingResult) ProtoReflect() protoreflect.Message {
// Deprecated: Use EmbeddingResult.ProtoReflect.Descriptor instead.
func (*EmbeddingResult) Descriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{5}
+ return file_backend_proto_rawDescGZIP(), []int{13}
}
func (x *EmbeddingResult) GetEmbeddings() []float32 {
@@ -1116,7 +1532,7 @@ type TranscriptRequest struct {
func (x *TranscriptRequest) Reset() {
*x = TranscriptRequest{}
if protoimpl.UnsafeEnabled {
- mi := &file_backend_proto_msgTypes[6]
+ mi := &file_backend_proto_msgTypes[14]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
@@ -1129,7 +1545,7 @@ func (x *TranscriptRequest) String() string {
func (*TranscriptRequest) ProtoMessage() {}
func (x *TranscriptRequest) ProtoReflect() protoreflect.Message {
- mi := &file_backend_proto_msgTypes[6]
+ mi := &file_backend_proto_msgTypes[14]
if protoimpl.UnsafeEnabled && x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
@@ -1142,7 +1558,7 @@ func (x *TranscriptRequest) ProtoReflect() protoreflect.Message {
// Deprecated: Use TranscriptRequest.ProtoReflect.Descriptor instead.
func (*TranscriptRequest) Descriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{6}
+ return file_backend_proto_rawDescGZIP(), []int{14}
}
func (x *TranscriptRequest) GetDst() string {
@@ -1178,7 +1594,7 @@ type TranscriptResult struct {
func (x *TranscriptResult) Reset() {
*x = TranscriptResult{}
if protoimpl.UnsafeEnabled {
- mi := &file_backend_proto_msgTypes[7]
+ mi := &file_backend_proto_msgTypes[15]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
@@ -1191,7 +1607,7 @@ func (x *TranscriptResult) String() string {
func (*TranscriptResult) ProtoMessage() {}
func (x *TranscriptResult) ProtoReflect() protoreflect.Message {
- mi := &file_backend_proto_msgTypes[7]
+ mi := &file_backend_proto_msgTypes[15]
if protoimpl.UnsafeEnabled && x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
@@ -1204,7 +1620,7 @@ func (x *TranscriptResult) ProtoReflect() protoreflect.Message {
// Deprecated: Use TranscriptResult.ProtoReflect.Descriptor instead.
func (*TranscriptResult) Descriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{7}
+ return file_backend_proto_rawDescGZIP(), []int{15}
}
func (x *TranscriptResult) GetSegments() []*TranscriptSegment {
@@ -1236,7 +1652,7 @@ type TranscriptSegment struct {
func (x *TranscriptSegment) Reset() {
*x = TranscriptSegment{}
if protoimpl.UnsafeEnabled {
- mi := &file_backend_proto_msgTypes[8]
+ mi := &file_backend_proto_msgTypes[16]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
@@ -1249,7 +1665,7 @@ func (x *TranscriptSegment) String() string {
func (*TranscriptSegment) ProtoMessage() {}
func (x *TranscriptSegment) ProtoReflect() protoreflect.Message {
- mi := &file_backend_proto_msgTypes[8]
+ mi := &file_backend_proto_msgTypes[16]
if protoimpl.UnsafeEnabled && x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
@@ -1262,7 +1678,7 @@ func (x *TranscriptSegment) ProtoReflect() protoreflect.Message {
// Deprecated: Use TranscriptSegment.ProtoReflect.Descriptor instead.
func (*TranscriptSegment) Descriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{8}
+ return file_backend_proto_rawDescGZIP(), []int{16}
}
func (x *TranscriptSegment) GetId() int32 {
@@ -1322,7 +1738,7 @@ type GenerateImageRequest struct {
func (x *GenerateImageRequest) Reset() {
*x = GenerateImageRequest{}
if protoimpl.UnsafeEnabled {
- mi := &file_backend_proto_msgTypes[9]
+ mi := &file_backend_proto_msgTypes[17]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
@@ -1335,7 +1751,7 @@ func (x *GenerateImageRequest) String() string {
func (*GenerateImageRequest) ProtoMessage() {}
func (x *GenerateImageRequest) ProtoReflect() protoreflect.Message {
- mi := &file_backend_proto_msgTypes[9]
+ mi := &file_backend_proto_msgTypes[17]
if protoimpl.UnsafeEnabled && x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
@@ -1348,7 +1764,7 @@ func (x *GenerateImageRequest) ProtoReflect() protoreflect.Message {
// Deprecated: Use GenerateImageRequest.ProtoReflect.Descriptor instead.
func (*GenerateImageRequest) Descriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{9}
+ return file_backend_proto_rawDescGZIP(), []int{17}
}
func (x *GenerateImageRequest) GetHeight() int32 {
@@ -1442,7 +1858,7 @@ type TTSRequest struct {
func (x *TTSRequest) Reset() {
*x = TTSRequest{}
if protoimpl.UnsafeEnabled {
- mi := &file_backend_proto_msgTypes[10]
+ mi := &file_backend_proto_msgTypes[18]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
@@ -1455,7 +1871,7 @@ func (x *TTSRequest) String() string {
func (*TTSRequest) ProtoMessage() {}
func (x *TTSRequest) ProtoReflect() protoreflect.Message {
- mi := &file_backend_proto_msgTypes[10]
+ mi := &file_backend_proto_msgTypes[18]
if protoimpl.UnsafeEnabled && x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
@@ -1468,7 +1884,7 @@ func (x *TTSRequest) ProtoReflect() protoreflect.Message {
// Deprecated: Use TTSRequest.ProtoReflect.Descriptor instead.
func (*TTSRequest) Descriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{10}
+ return file_backend_proto_rawDescGZIP(), []int{18}
}
func (x *TTSRequest) GetText() string {
@@ -1511,7 +1927,7 @@ type TokenizationResponse struct {
func (x *TokenizationResponse) Reset() {
*x = TokenizationResponse{}
if protoimpl.UnsafeEnabled {
- mi := &file_backend_proto_msgTypes[11]
+ mi := &file_backend_proto_msgTypes[19]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
@@ -1524,7 +1940,7 @@ func (x *TokenizationResponse) String() string {
func (*TokenizationResponse) ProtoMessage() {}
func (x *TokenizationResponse) ProtoReflect() protoreflect.Message {
- mi := &file_backend_proto_msgTypes[11]
+ mi := &file_backend_proto_msgTypes[19]
if protoimpl.UnsafeEnabled && x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
@@ -1537,7 +1953,7 @@ func (x *TokenizationResponse) ProtoReflect() protoreflect.Message {
// Deprecated: Use TokenizationResponse.ProtoReflect.Descriptor instead.
func (*TokenizationResponse) Descriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{11}
+ return file_backend_proto_rawDescGZIP(), []int{19}
}
func (x *TokenizationResponse) GetLength() int32 {
@@ -1566,7 +1982,7 @@ type MemoryUsageData struct {
func (x *MemoryUsageData) Reset() {
*x = MemoryUsageData{}
if protoimpl.UnsafeEnabled {
- mi := &file_backend_proto_msgTypes[12]
+ mi := &file_backend_proto_msgTypes[20]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
@@ -1579,7 +1995,7 @@ func (x *MemoryUsageData) String() string {
func (*MemoryUsageData) ProtoMessage() {}
func (x *MemoryUsageData) ProtoReflect() protoreflect.Message {
- mi := &file_backend_proto_msgTypes[12]
+ mi := &file_backend_proto_msgTypes[20]
if protoimpl.UnsafeEnabled && x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
@@ -1592,7 +2008,7 @@ func (x *MemoryUsageData) ProtoReflect() protoreflect.Message {
// Deprecated: Use MemoryUsageData.ProtoReflect.Descriptor instead.
func (*MemoryUsageData) Descriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{12}
+ return file_backend_proto_rawDescGZIP(), []int{20}
}
func (x *MemoryUsageData) GetTotal() uint64 {
@@ -1621,7 +2037,7 @@ type StatusResponse struct {
func (x *StatusResponse) Reset() {
*x = StatusResponse{}
if protoimpl.UnsafeEnabled {
- mi := &file_backend_proto_msgTypes[13]
+ mi := &file_backend_proto_msgTypes[21]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
@@ -1634,7 +2050,7 @@ func (x *StatusResponse) String() string {
func (*StatusResponse) ProtoMessage() {}
func (x *StatusResponse) ProtoReflect() protoreflect.Message {
- mi := &file_backend_proto_msgTypes[13]
+ mi := &file_backend_proto_msgTypes[21]
if protoimpl.UnsafeEnabled && x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
@@ -1647,7 +2063,7 @@ func (x *StatusResponse) ProtoReflect() protoreflect.Message {
// Deprecated: Use StatusResponse.ProtoReflect.Descriptor instead.
func (*StatusResponse) Descriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{13}
+ return file_backend_proto_rawDescGZIP(), []int{21}
}
func (x *StatusResponse) GetState() StatusResponse_State {
@@ -1668,322 +2084,377 @@ var File_backend_proto protoreflect.FileDescriptor
var file_backend_proto_rawDesc = []byte{
0x0a, 0x0d, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12,
- 0x07, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x22, 0x0f, 0x0a, 0x0d, 0x48, 0x65, 0x61, 0x6c,
- 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0xf4, 0x09, 0x0a, 0x0e, 0x50, 0x72,
- 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x16, 0x0a, 0x06,
- 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x50, 0x72,
- 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x53, 0x65, 0x65, 0x64, 0x18, 0x02, 0x20, 0x01,
- 0x28, 0x05, 0x52, 0x04, 0x53, 0x65, 0x65, 0x64, 0x12, 0x18, 0x0a, 0x07, 0x54, 0x68, 0x72, 0x65,
- 0x61, 0x64, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61,
- 0x64, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x04, 0x20, 0x01,
- 0x28, 0x05, 0x52, 0x06, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x54, 0x6f,
- 0x70, 0x4b, 0x18, 0x05, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x54, 0x6f, 0x70, 0x4b, 0x12, 0x16,
- 0x0a, 0x06, 0x52, 0x65, 0x70, 0x65, 0x61, 0x74, 0x18, 0x06, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06,
- 0x52, 0x65, 0x70, 0x65, 0x61, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x42, 0x61, 0x74, 0x63, 0x68, 0x18,
- 0x07, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x42, 0x61, 0x74, 0x63, 0x68, 0x12, 0x14, 0x0a, 0x05,
- 0x4e, 0x4b, 0x65, 0x65, 0x70, 0x18, 0x08, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x4e, 0x4b, 0x65,
- 0x65, 0x70, 0x12, 0x20, 0x0a, 0x0b, 0x54, 0x65, 0x6d, 0x70, 0x65, 0x72, 0x61, 0x74, 0x75, 0x72,
- 0x65, 0x18, 0x09, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0b, 0x54, 0x65, 0x6d, 0x70, 0x65, 0x72, 0x61,
- 0x74, 0x75, 0x72, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x18,
- 0x0a, 0x20, 0x01, 0x28, 0x02, 0x52, 0x07, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x12, 0x14,
- 0x0a, 0x05, 0x46, 0x31, 0x36, 0x4b, 0x56, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x46,
- 0x31, 0x36, 0x4b, 0x56, 0x12, 0x1c, 0x0a, 0x09, 0x44, 0x65, 0x62, 0x75, 0x67, 0x4d, 0x6f, 0x64,
- 0x65, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x44, 0x65, 0x62, 0x75, 0x67, 0x4d, 0x6f,
- 0x64, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x53, 0x74, 0x6f, 0x70, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74,
- 0x73, 0x18, 0x0d, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0b, 0x53, 0x74, 0x6f, 0x70, 0x50, 0x72, 0x6f,
- 0x6d, 0x70, 0x74, 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x49, 0x67, 0x6e, 0x6f, 0x72, 0x65, 0x45, 0x4f,
- 0x53, 0x18, 0x0e, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x49, 0x67, 0x6e, 0x6f, 0x72, 0x65, 0x45,
- 0x4f, 0x53, 0x12, 0x2c, 0x0a, 0x11, 0x54, 0x61, 0x69, 0x6c, 0x46, 0x72, 0x65, 0x65, 0x53, 0x61,
- 0x6d, 0x70, 0x6c, 0x69, 0x6e, 0x67, 0x5a, 0x18, 0x0f, 0x20, 0x01, 0x28, 0x02, 0x52, 0x11, 0x54,
- 0x61, 0x69, 0x6c, 0x46, 0x72, 0x65, 0x65, 0x53, 0x61, 0x6d, 0x70, 0x6c, 0x69, 0x6e, 0x67, 0x5a,
- 0x12, 0x1a, 0x0a, 0x08, 0x54, 0x79, 0x70, 0x69, 0x63, 0x61, 0x6c, 0x50, 0x18, 0x10, 0x20, 0x01,
- 0x28, 0x02, 0x52, 0x08, 0x54, 0x79, 0x70, 0x69, 0x63, 0x61, 0x6c, 0x50, 0x12, 0x2a, 0x0a, 0x10,
- 0x46, 0x72, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x63, 0x79, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79,
- 0x18, 0x11, 0x20, 0x01, 0x28, 0x02, 0x52, 0x10, 0x46, 0x72, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x63,
- 0x79, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x12, 0x28, 0x0a, 0x0f, 0x50, 0x72, 0x65, 0x73,
- 0x65, 0x6e, 0x63, 0x65, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x18, 0x12, 0x20, 0x01, 0x28,
- 0x02, 0x52, 0x0f, 0x50, 0x72, 0x65, 0x73, 0x65, 0x6e, 0x63, 0x65, 0x50, 0x65, 0x6e, 0x61, 0x6c,
- 0x74, 0x79, 0x12, 0x1a, 0x0a, 0x08, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x18, 0x13,
- 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x12, 0x20,
- 0x0a, 0x0b, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x45, 0x54, 0x41, 0x18, 0x14, 0x20,
- 0x01, 0x28, 0x02, 0x52, 0x0b, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x45, 0x54, 0x41,
- 0x12, 0x20, 0x0a, 0x0b, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x54, 0x41, 0x55, 0x18,
- 0x15, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0b, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x54,
- 0x41, 0x55, 0x12, 0x1e, 0x0a, 0x0a, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x4e, 0x4c,
- 0x18, 0x16, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x69, 0x7a, 0x65,
- 0x4e, 0x4c, 0x12, 0x1c, 0x0a, 0x09, 0x4c, 0x6f, 0x67, 0x69, 0x74, 0x42, 0x69, 0x61, 0x73, 0x18,
- 0x17, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x4c, 0x6f, 0x67, 0x69, 0x74, 0x42, 0x69, 0x61, 0x73,
- 0x12, 0x14, 0x0a, 0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x18, 0x19, 0x20, 0x01, 0x28, 0x08, 0x52,
- 0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x12, 0x12, 0x0a, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x18, 0x1a,
- 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x12, 0x26, 0x0a, 0x0e, 0x50, 0x72,
- 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x41, 0x6c, 0x6c, 0x18, 0x1b, 0x20, 0x01,
- 0x28, 0x08, 0x52, 0x0e, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x41,
- 0x6c, 0x6c, 0x12, 0x24, 0x0a, 0x0d, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68,
- 0x65, 0x52, 0x4f, 0x18, 0x1c, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0d, 0x50, 0x72, 0x6f, 0x6d, 0x70,
- 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x52, 0x4f, 0x12, 0x18, 0x0a, 0x07, 0x47, 0x72, 0x61, 0x6d,
- 0x6d, 0x61, 0x72, 0x18, 0x1d, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x47, 0x72, 0x61, 0x6d, 0x6d,
- 0x61, 0x72, 0x12, 0x18, 0x0a, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x18, 0x1e, 0x20,
+ 0x07, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x22, 0x23, 0x0a, 0x09, 0x53, 0x74, 0x6f, 0x72,
+ 0x65, 0x73, 0x4b, 0x65, 0x79, 0x12, 0x16, 0x0a, 0x06, 0x46, 0x6c, 0x6f, 0x61, 0x74, 0x73, 0x18,
+ 0x01, 0x20, 0x03, 0x28, 0x02, 0x52, 0x06, 0x46, 0x6c, 0x6f, 0x61, 0x74, 0x73, 0x22, 0x23, 0x0a,
+ 0x0b, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x14, 0x0a, 0x05,
+ 0x42, 0x79, 0x74, 0x65, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x05, 0x42, 0x79, 0x74,
+ 0x65, 0x73, 0x22, 0x68, 0x0a, 0x10, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x53, 0x65, 0x74, 0x4f,
+ 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x26, 0x0a, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x18, 0x01,
+ 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53,
+ 0x74, 0x6f, 0x72, 0x65, 0x73, 0x4b, 0x65, 0x79, 0x52, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x12, 0x2c,
+ 0x0a, 0x06, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x14,
+ 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x56,
+ 0x61, 0x6c, 0x75, 0x65, 0x52, 0x06, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x22, 0x3d, 0x0a, 0x13,
+ 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x4f, 0x70, 0x74, 0x69,
+ 0x6f, 0x6e, 0x73, 0x12, 0x26, 0x0a, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28,
+ 0x0b, 0x32, 0x12, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72,
+ 0x65, 0x73, 0x4b, 0x65, 0x79, 0x52, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x22, 0x3a, 0x0a, 0x10, 0x53,
+ 0x74, 0x6f, 0x72, 0x65, 0x73, 0x47, 0x65, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12,
+ 0x26, 0x0a, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e,
+ 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x4b, 0x65,
+ 0x79, 0x52, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x22, 0x67, 0x0a, 0x0f, 0x53, 0x74, 0x6f, 0x72, 0x65,
+ 0x73, 0x47, 0x65, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x26, 0x0a, 0x04, 0x4b, 0x65,
+ 0x79, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65,
+ 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x4b, 0x65, 0x79, 0x52, 0x04, 0x4b, 0x65,
+ 0x79, 0x73, 0x12, 0x2c, 0x0a, 0x06, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03,
+ 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f,
+ 0x72, 0x65, 0x73, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, 0x06, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73,
+ 0x22, 0x4d, 0x0a, 0x11, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69, 0x6e, 0x64, 0x4f, 0x70,
+ 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x24, 0x0a, 0x03, 0x4b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01,
+ 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f,
+ 0x72, 0x65, 0x73, 0x4b, 0x65, 0x79, 0x52, 0x03, 0x4b, 0x65, 0x79, 0x12, 0x12, 0x0a, 0x04, 0x54,
+ 0x6f, 0x70, 0x4b, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x54, 0x6f, 0x70, 0x4b, 0x22,
+ 0x8c, 0x01, 0x0a, 0x10, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69, 0x6e, 0x64, 0x52, 0x65,
+ 0x73, 0x75, 0x6c, 0x74, 0x12, 0x26, 0x0a, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x18, 0x01, 0x20, 0x03,
+ 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f,
+ 0x72, 0x65, 0x73, 0x4b, 0x65, 0x79, 0x52, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x12, 0x2c, 0x0a, 0x06,
+ 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x62,
+ 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x56, 0x61, 0x6c,
+ 0x75, 0x65, 0x52, 0x06, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x12, 0x22, 0x0a, 0x0c, 0x53, 0x69,
+ 0x6d, 0x69, 0x6c, 0x61, 0x72, 0x69, 0x74, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x02,
+ 0x52, 0x0c, 0x53, 0x69, 0x6d, 0x69, 0x6c, 0x61, 0x72, 0x69, 0x74, 0x69, 0x65, 0x73, 0x22, 0x0f,
+ 0x0a, 0x0d, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22,
+ 0xf4, 0x09, 0x0a, 0x0e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f,
+ 0x6e, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x01, 0x20, 0x01,
+ 0x28, 0x09, 0x52, 0x06, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x53, 0x65,
+ 0x65, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x53, 0x65, 0x65, 0x64, 0x12, 0x18,
+ 0x0a, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52,
+ 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x54, 0x6f, 0x6b, 0x65,
+ 0x6e, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73,
+ 0x12, 0x12, 0x0a, 0x04, 0x54, 0x6f, 0x70, 0x4b, 0x18, 0x05, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04,
+ 0x54, 0x6f, 0x70, 0x4b, 0x12, 0x16, 0x0a, 0x06, 0x52, 0x65, 0x70, 0x65, 0x61, 0x74, 0x18, 0x06,
+ 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x52, 0x65, 0x70, 0x65, 0x61, 0x74, 0x12, 0x14, 0x0a, 0x05,
+ 0x42, 0x61, 0x74, 0x63, 0x68, 0x18, 0x07, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x42, 0x61, 0x74,
+ 0x63, 0x68, 0x12, 0x14, 0x0a, 0x05, 0x4e, 0x4b, 0x65, 0x65, 0x70, 0x18, 0x08, 0x20, 0x01, 0x28,
+ 0x05, 0x52, 0x05, 0x4e, 0x4b, 0x65, 0x65, 0x70, 0x12, 0x20, 0x0a, 0x0b, 0x54, 0x65, 0x6d, 0x70,
+ 0x65, 0x72, 0x61, 0x74, 0x75, 0x72, 0x65, 0x18, 0x09, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0b, 0x54,
+ 0x65, 0x6d, 0x70, 0x65, 0x72, 0x61, 0x74, 0x75, 0x72, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x50, 0x65,
+ 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x02, 0x52, 0x07, 0x50, 0x65, 0x6e,
+ 0x61, 0x6c, 0x74, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x46, 0x31, 0x36, 0x4b, 0x56, 0x18, 0x0b, 0x20,
+ 0x01, 0x28, 0x08, 0x52, 0x05, 0x46, 0x31, 0x36, 0x4b, 0x56, 0x12, 0x1c, 0x0a, 0x09, 0x44, 0x65,
+ 0x62, 0x75, 0x67, 0x4d, 0x6f, 0x64, 0x65, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x44,
+ 0x65, 0x62, 0x75, 0x67, 0x4d, 0x6f, 0x64, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x53, 0x74, 0x6f, 0x70,
+ 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x73, 0x18, 0x0d, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0b, 0x53,
+ 0x74, 0x6f, 0x70, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x49, 0x67,
+ 0x6e, 0x6f, 0x72, 0x65, 0x45, 0x4f, 0x53, 0x18, 0x0e, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x49,
+ 0x67, 0x6e, 0x6f, 0x72, 0x65, 0x45, 0x4f, 0x53, 0x12, 0x2c, 0x0a, 0x11, 0x54, 0x61, 0x69, 0x6c,
+ 0x46, 0x72, 0x65, 0x65, 0x53, 0x61, 0x6d, 0x70, 0x6c, 0x69, 0x6e, 0x67, 0x5a, 0x18, 0x0f, 0x20,
+ 0x01, 0x28, 0x02, 0x52, 0x11, 0x54, 0x61, 0x69, 0x6c, 0x46, 0x72, 0x65, 0x65, 0x53, 0x61, 0x6d,
+ 0x70, 0x6c, 0x69, 0x6e, 0x67, 0x5a, 0x12, 0x1a, 0x0a, 0x08, 0x54, 0x79, 0x70, 0x69, 0x63, 0x61,
+ 0x6c, 0x50, 0x18, 0x10, 0x20, 0x01, 0x28, 0x02, 0x52, 0x08, 0x54, 0x79, 0x70, 0x69, 0x63, 0x61,
+ 0x6c, 0x50, 0x12, 0x2a, 0x0a, 0x10, 0x46, 0x72, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x63, 0x79, 0x50,
+ 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x18, 0x11, 0x20, 0x01, 0x28, 0x02, 0x52, 0x10, 0x46, 0x72,
+ 0x65, 0x71, 0x75, 0x65, 0x6e, 0x63, 0x79, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x12, 0x28,
+ 0x0a, 0x0f, 0x50, 0x72, 0x65, 0x73, 0x65, 0x6e, 0x63, 0x65, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74,
+ 0x79, 0x18, 0x12, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0f, 0x50, 0x72, 0x65, 0x73, 0x65, 0x6e, 0x63,
+ 0x65, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x12, 0x1a, 0x0a, 0x08, 0x4d, 0x69, 0x72, 0x6f,
+ 0x73, 0x74, 0x61, 0x74, 0x18, 0x13, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x4d, 0x69, 0x72, 0x6f,
+ 0x73, 0x74, 0x61, 0x74, 0x12, 0x20, 0x0a, 0x0b, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74,
+ 0x45, 0x54, 0x41, 0x18, 0x14, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0b, 0x4d, 0x69, 0x72, 0x6f, 0x73,
+ 0x74, 0x61, 0x74, 0x45, 0x54, 0x41, 0x12, 0x20, 0x0a, 0x0b, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74,
+ 0x61, 0x74, 0x54, 0x41, 0x55, 0x18, 0x15, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0b, 0x4d, 0x69, 0x72,
+ 0x6f, 0x73, 0x74, 0x61, 0x74, 0x54, 0x41, 0x55, 0x12, 0x1e, 0x0a, 0x0a, 0x50, 0x65, 0x6e, 0x61,
+ 0x6c, 0x69, 0x7a, 0x65, 0x4e, 0x4c, 0x18, 0x16, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, 0x50, 0x65,
+ 0x6e, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x4e, 0x4c, 0x12, 0x1c, 0x0a, 0x09, 0x4c, 0x6f, 0x67, 0x69,
+ 0x74, 0x42, 0x69, 0x61, 0x73, 0x18, 0x17, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x4c, 0x6f, 0x67,
+ 0x69, 0x74, 0x42, 0x69, 0x61, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x18,
+ 0x19, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x12, 0x12, 0x0a, 0x04,
+ 0x4d, 0x4d, 0x61, 0x70, 0x18, 0x1a, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x4d, 0x4d, 0x61, 0x70,
+ 0x12, 0x26, 0x0a, 0x0e, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x41,
+ 0x6c, 0x6c, 0x18, 0x1b, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0e, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74,
+ 0x43, 0x61, 0x63, 0x68, 0x65, 0x41, 0x6c, 0x6c, 0x12, 0x24, 0x0a, 0x0d, 0x50, 0x72, 0x6f, 0x6d,
+ 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x52, 0x4f, 0x18, 0x1c, 0x20, 0x01, 0x28, 0x08, 0x52,
+ 0x0d, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x52, 0x4f, 0x12, 0x18,
+ 0x0a, 0x07, 0x47, 0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72, 0x18, 0x1d, 0x20, 0x01, 0x28, 0x09, 0x52,
+ 0x07, 0x47, 0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72, 0x12, 0x18, 0x0a, 0x07, 0x4d, 0x61, 0x69, 0x6e,
+ 0x47, 0x50, 0x55, 0x18, 0x1e, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47,
+ 0x50, 0x55, 0x12, 0x20, 0x0a, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69,
+ 0x74, 0x18, 0x1f, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53,
+ 0x70, 0x6c, 0x69, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x54, 0x6f, 0x70, 0x50, 0x18, 0x20, 0x20, 0x01,
+ 0x28, 0x02, 0x52, 0x04, 0x54, 0x6f, 0x70, 0x50, 0x12, 0x28, 0x0a, 0x0f, 0x50, 0x72, 0x6f, 0x6d,
+ 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x50, 0x61, 0x74, 0x68, 0x18, 0x21, 0x20, 0x01, 0x28,
+ 0x09, 0x52, 0x0f, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x50, 0x61,
+ 0x74, 0x68, 0x12, 0x14, 0x0a, 0x05, 0x44, 0x65, 0x62, 0x75, 0x67, 0x18, 0x22, 0x20, 0x01, 0x28,
+ 0x08, 0x52, 0x05, 0x44, 0x65, 0x62, 0x75, 0x67, 0x12, 0x28, 0x0a, 0x0f, 0x45, 0x6d, 0x62, 0x65,
+ 0x64, 0x64, 0x69, 0x6e, 0x67, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x23, 0x20, 0x03, 0x28,
+ 0x05, 0x52, 0x0f, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x54, 0x6f, 0x6b, 0x65,
+ 0x6e, 0x73, 0x12, 0x1e, 0x0a, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73,
+ 0x18, 0x24, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e,
+ 0x67, 0x73, 0x12, 0x22, 0x0a, 0x0c, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x42, 0x61,
+ 0x73, 0x65, 0x18, 0x25, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72,
+ 0x65, 0x71, 0x42, 0x61, 0x73, 0x65, 0x12, 0x24, 0x0a, 0x0d, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72,
+ 0x65, 0x71, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x26, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0d, 0x52,
+ 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x30, 0x0a, 0x13,
+ 0x4e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x53, 0x63,
+ 0x61, 0x6c, 0x65, 0x18, 0x27, 0x20, 0x01, 0x28, 0x02, 0x52, 0x13, 0x4e, 0x65, 0x67, 0x61, 0x74,
+ 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x26,
+ 0x0a, 0x0e, 0x4e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74,
+ 0x18, 0x28, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x4e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65,
+ 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x4e, 0x44, 0x72, 0x61, 0x66, 0x74,
+ 0x18, 0x29, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x4e, 0x44, 0x72, 0x61, 0x66, 0x74, 0x12, 0x16,
+ 0x0a, 0x06, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x73, 0x18, 0x2a, 0x20, 0x03, 0x28, 0x09, 0x52, 0x06,
+ 0x49, 0x6d, 0x61, 0x67, 0x65, 0x73, 0x22, 0x21, 0x0a, 0x05, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x12,
+ 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c,
+ 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0xa2, 0x0d, 0x0a, 0x0c, 0x4d, 0x6f,
+ 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x4d, 0x6f,
+ 0x64, 0x65, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x4d, 0x6f, 0x64, 0x65, 0x6c,
+ 0x12, 0x20, 0x0a, 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x53, 0x69, 0x7a, 0x65, 0x18,
+ 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x53, 0x69,
+ 0x7a, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x53, 0x65, 0x65, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05,
+ 0x52, 0x04, 0x53, 0x65, 0x65, 0x64, 0x12, 0x16, 0x0a, 0x06, 0x4e, 0x42, 0x61, 0x74, 0x63, 0x68,
+ 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x4e, 0x42, 0x61, 0x74, 0x63, 0x68, 0x12, 0x1c,
+ 0x0a, 0x09, 0x46, 0x31, 0x36, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x18, 0x05, 0x20, 0x01, 0x28,
+ 0x08, 0x52, 0x09, 0x46, 0x31, 0x36, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x12, 0x14, 0x0a, 0x05,
+ 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x4d, 0x4c, 0x6f,
+ 0x63, 0x6b, 0x12, 0x12, 0x0a, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x18, 0x07, 0x20, 0x01, 0x28, 0x08,
+ 0x52, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x12, 0x1c, 0x0a, 0x09, 0x56, 0x6f, 0x63, 0x61, 0x62, 0x4f,
+ 0x6e, 0x6c, 0x79, 0x18, 0x08, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x56, 0x6f, 0x63, 0x61, 0x62,
+ 0x4f, 0x6e, 0x6c, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x4c, 0x6f, 0x77, 0x56, 0x52, 0x41, 0x4d, 0x18,
+ 0x09, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x4c, 0x6f, 0x77, 0x56, 0x52, 0x41, 0x4d, 0x12, 0x1e,
+ 0x0a, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x0a, 0x20, 0x01,
+ 0x28, 0x08, 0x52, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x12, 0x12,
+ 0x0a, 0x04, 0x4e, 0x55, 0x4d, 0x41, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x4e, 0x55,
+ 0x4d, 0x41, 0x12, 0x1e, 0x0a, 0x0a, 0x4e, 0x47, 0x50, 0x55, 0x4c, 0x61, 0x79, 0x65, 0x72, 0x73,
+ 0x18, 0x0c, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0a, 0x4e, 0x47, 0x50, 0x55, 0x4c, 0x61, 0x79, 0x65,
+ 0x72, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x18, 0x0d, 0x20,
0x01, 0x28, 0x09, 0x52, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x12, 0x20, 0x0a, 0x0b,
- 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74, 0x18, 0x1f, 0x20, 0x01, 0x28,
- 0x09, 0x52, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74, 0x12, 0x12,
- 0x0a, 0x04, 0x54, 0x6f, 0x70, 0x50, 0x18, 0x20, 0x20, 0x01, 0x28, 0x02, 0x52, 0x04, 0x54, 0x6f,
- 0x70, 0x50, 0x12, 0x28, 0x0a, 0x0f, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68,
- 0x65, 0x50, 0x61, 0x74, 0x68, 0x18, 0x21, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0f, 0x50, 0x72, 0x6f,
- 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x50, 0x61, 0x74, 0x68, 0x12, 0x14, 0x0a, 0x05,
- 0x44, 0x65, 0x62, 0x75, 0x67, 0x18, 0x22, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x44, 0x65, 0x62,
- 0x75, 0x67, 0x12, 0x28, 0x0a, 0x0f, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x54,
- 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x23, 0x20, 0x03, 0x28, 0x05, 0x52, 0x0f, 0x45, 0x6d, 0x62,
- 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x12, 0x1e, 0x0a, 0x0a,
- 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x24, 0x20, 0x01, 0x28, 0x09,
- 0x52, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x12, 0x22, 0x0a, 0x0c,
- 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x42, 0x61, 0x73, 0x65, 0x18, 0x25, 0x20, 0x01,
- 0x28, 0x02, 0x52, 0x0c, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x42, 0x61, 0x73, 0x65,
- 0x12, 0x24, 0x0a, 0x0d, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61, 0x6c,
- 0x65, 0x18, 0x26, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0d, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65,
- 0x71, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x30, 0x0a, 0x13, 0x4e, 0x65, 0x67, 0x61, 0x74, 0x69,
- 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x27, 0x20,
- 0x01, 0x28, 0x02, 0x52, 0x13, 0x4e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f,
- 0x6d, 0x70, 0x74, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x26, 0x0a, 0x0e, 0x4e, 0x65, 0x67, 0x61,
- 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x28, 0x20, 0x01, 0x28, 0x09,
- 0x52, 0x0e, 0x4e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74,
- 0x12, 0x16, 0x0a, 0x06, 0x4e, 0x44, 0x72, 0x61, 0x66, 0x74, 0x18, 0x29, 0x20, 0x01, 0x28, 0x05,
- 0x52, 0x06, 0x4e, 0x44, 0x72, 0x61, 0x66, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x49, 0x6d, 0x61, 0x67,
- 0x65, 0x73, 0x18, 0x2a, 0x20, 0x03, 0x28, 0x09, 0x52, 0x06, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x73,
- 0x22, 0x21, 0x0a, 0x05, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73,
- 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73,
- 0x61, 0x67, 0x65, 0x22, 0xa2, 0x0d, 0x0a, 0x0c, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74,
- 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x01, 0x20,
- 0x01, 0x28, 0x09, 0x52, 0x05, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x20, 0x0a, 0x0b, 0x43, 0x6f,
- 0x6e, 0x74, 0x65, 0x78, 0x74, 0x53, 0x69, 0x7a, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52,
- 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x53, 0x69, 0x7a, 0x65, 0x12, 0x12, 0x0a, 0x04,
- 0x53, 0x65, 0x65, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x53, 0x65, 0x65, 0x64,
- 0x12, 0x16, 0x0a, 0x06, 0x4e, 0x42, 0x61, 0x74, 0x63, 0x68, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05,
- 0x52, 0x06, 0x4e, 0x42, 0x61, 0x74, 0x63, 0x68, 0x12, 0x1c, 0x0a, 0x09, 0x46, 0x31, 0x36, 0x4d,
- 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x46, 0x31, 0x36,
- 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x18,
- 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x12, 0x12, 0x0a, 0x04,
- 0x4d, 0x4d, 0x61, 0x70, 0x18, 0x07, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x4d, 0x4d, 0x61, 0x70,
- 0x12, 0x1c, 0x0a, 0x09, 0x56, 0x6f, 0x63, 0x61, 0x62, 0x4f, 0x6e, 0x6c, 0x79, 0x18, 0x08, 0x20,
- 0x01, 0x28, 0x08, 0x52, 0x09, 0x56, 0x6f, 0x63, 0x61, 0x62, 0x4f, 0x6e, 0x6c, 0x79, 0x12, 0x18,
- 0x0a, 0x07, 0x4c, 0x6f, 0x77, 0x56, 0x52, 0x41, 0x4d, 0x18, 0x09, 0x20, 0x01, 0x28, 0x08, 0x52,
- 0x07, 0x4c, 0x6f, 0x77, 0x56, 0x52, 0x41, 0x4d, 0x12, 0x1e, 0x0a, 0x0a, 0x45, 0x6d, 0x62, 0x65,
- 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, 0x45, 0x6d,
- 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x4e, 0x55, 0x4d, 0x41,
- 0x18, 0x0b, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x4e, 0x55, 0x4d, 0x41, 0x12, 0x1e, 0x0a, 0x0a,
- 0x4e, 0x47, 0x50, 0x55, 0x4c, 0x61, 0x79, 0x65, 0x72, 0x73, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x05,
- 0x52, 0x0a, 0x4e, 0x47, 0x50, 0x55, 0x4c, 0x61, 0x79, 0x65, 0x72, 0x73, 0x12, 0x18, 0x0a, 0x07,
- 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x18, 0x0d, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x4d,
- 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x12, 0x20, 0x0a, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72,
- 0x53, 0x70, 0x6c, 0x69, 0x74, 0x18, 0x0e, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x54, 0x65, 0x6e,
- 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x54, 0x68, 0x72, 0x65,
- 0x61, 0x64, 0x73, 0x18, 0x0f, 0x20, 0x01, 0x28, 0x05, 0x52, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61,
- 0x64, 0x73, 0x12, 0x2c, 0x0a, 0x11, 0x4c, 0x69, 0x62, 0x72, 0x61, 0x72, 0x79, 0x53, 0x65, 0x61,
- 0x72, 0x63, 0x68, 0x50, 0x61, 0x74, 0x68, 0x18, 0x10, 0x20, 0x01, 0x28, 0x09, 0x52, 0x11, 0x4c,
- 0x69, 0x62, 0x72, 0x61, 0x72, 0x79, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, 0x50, 0x61, 0x74, 0x68,
- 0x12, 0x22, 0x0a, 0x0c, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x42, 0x61, 0x73, 0x65,
- 0x18, 0x11, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71,
- 0x42, 0x61, 0x73, 0x65, 0x12, 0x24, 0x0a, 0x0d, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71,
- 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x12, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0d, 0x52, 0x6f, 0x70,
- 0x65, 0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x1e, 0x0a, 0x0a, 0x52, 0x4d,
- 0x53, 0x4e, 0x6f, 0x72, 0x6d, 0x45, 0x70, 0x73, 0x18, 0x13, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0a,
- 0x52, 0x4d, 0x53, 0x4e, 0x6f, 0x72, 0x6d, 0x45, 0x70, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x4e, 0x47,
- 0x51, 0x41, 0x18, 0x14, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x4e, 0x47, 0x51, 0x41, 0x12, 0x1c,
- 0x0a, 0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x46, 0x69, 0x6c, 0x65, 0x18, 0x15, 0x20, 0x01, 0x28,
- 0x09, 0x52, 0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x46, 0x69, 0x6c, 0x65, 0x12, 0x16, 0x0a, 0x06,
- 0x44, 0x65, 0x76, 0x69, 0x63, 0x65, 0x18, 0x16, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x44, 0x65,
- 0x76, 0x69, 0x63, 0x65, 0x12, 0x1c, 0x0a, 0x09, 0x55, 0x73, 0x65, 0x54, 0x72, 0x69, 0x74, 0x6f,
- 0x6e, 0x18, 0x17, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x55, 0x73, 0x65, 0x54, 0x72, 0x69, 0x74,
- 0x6f, 0x6e, 0x12, 0x24, 0x0a, 0x0d, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x42, 0x61, 0x73, 0x65, 0x4e,
- 0x61, 0x6d, 0x65, 0x18, 0x18, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x4d, 0x6f, 0x64, 0x65, 0x6c,
- 0x42, 0x61, 0x73, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x2a, 0x0a, 0x10, 0x55, 0x73, 0x65, 0x46,
- 0x61, 0x73, 0x74, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x18, 0x19, 0x20, 0x01,
- 0x28, 0x08, 0x52, 0x10, 0x55, 0x73, 0x65, 0x46, 0x61, 0x73, 0x74, 0x54, 0x6f, 0x6b, 0x65, 0x6e,
- 0x69, 0x7a, 0x65, 0x72, 0x12, 0x22, 0x0a, 0x0c, 0x50, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65,
- 0x54, 0x79, 0x70, 0x65, 0x18, 0x1a, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x50, 0x69, 0x70, 0x65,
- 0x6c, 0x69, 0x6e, 0x65, 0x54, 0x79, 0x70, 0x65, 0x12, 0x24, 0x0a, 0x0d, 0x53, 0x63, 0x68, 0x65,
- 0x64, 0x75, 0x6c, 0x65, 0x72, 0x54, 0x79, 0x70, 0x65, 0x18, 0x1b, 0x20, 0x01, 0x28, 0x09, 0x52,
- 0x0d, 0x53, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, 0x65, 0x72, 0x54, 0x79, 0x70, 0x65, 0x12, 0x12,
- 0x0a, 0x04, 0x43, 0x55, 0x44, 0x41, 0x18, 0x1c, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x43, 0x55,
- 0x44, 0x41, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x46, 0x47, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x1d,
- 0x20, 0x01, 0x28, 0x02, 0x52, 0x08, 0x43, 0x46, 0x47, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x18,
- 0x0a, 0x07, 0x49, 0x4d, 0x47, 0x32, 0x49, 0x4d, 0x47, 0x18, 0x1e, 0x20, 0x01, 0x28, 0x08, 0x52,
- 0x07, 0x49, 0x4d, 0x47, 0x32, 0x49, 0x4d, 0x47, 0x12, 0x1c, 0x0a, 0x09, 0x43, 0x4c, 0x49, 0x50,
- 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x1f, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x43, 0x4c, 0x49,
- 0x50, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x24, 0x0a, 0x0d, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x75,
- 0x62, 0x66, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x18, 0x20, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x43,
- 0x4c, 0x49, 0x50, 0x53, 0x75, 0x62, 0x66, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08,
- 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x18, 0x21, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08,
- 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x12, 0x1e, 0x0a, 0x0a, 0x43, 0x6f, 0x6e, 0x74,
- 0x72, 0x6f, 0x6c, 0x4e, 0x65, 0x74, 0x18, 0x30, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x43, 0x6f,
- 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x4e, 0x65, 0x74, 0x12, 0x1c, 0x0a, 0x09, 0x54, 0x6f, 0x6b, 0x65,
- 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x18, 0x22, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x54, 0x6f, 0x6b,
- 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x4c, 0x6f, 0x72, 0x61, 0x42, 0x61,
- 0x73, 0x65, 0x18, 0x23, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x4c, 0x6f, 0x72, 0x61, 0x42, 0x61,
- 0x73, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x4c, 0x6f, 0x72, 0x61, 0x41, 0x64, 0x61, 0x70, 0x74, 0x65,
- 0x72, 0x18, 0x24, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x4c, 0x6f, 0x72, 0x61, 0x41, 0x64, 0x61,
- 0x70, 0x74, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x4c, 0x6f, 0x72, 0x61, 0x53, 0x63, 0x61, 0x6c,
- 0x65, 0x18, 0x2a, 0x20, 0x01, 0x28, 0x02, 0x52, 0x09, 0x4c, 0x6f, 0x72, 0x61, 0x53, 0x63, 0x61,
- 0x6c, 0x65, 0x12, 0x1c, 0x0a, 0x09, 0x4e, 0x6f, 0x4d, 0x75, 0x6c, 0x4d, 0x61, 0x74, 0x51, 0x18,
- 0x25, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x4e, 0x6f, 0x4d, 0x75, 0x6c, 0x4d, 0x61, 0x74, 0x51,
- 0x12, 0x1e, 0x0a, 0x0a, 0x44, 0x72, 0x61, 0x66, 0x74, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x27,
- 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x44, 0x72, 0x61, 0x66, 0x74, 0x4d, 0x6f, 0x64, 0x65, 0x6c,
- 0x12, 0x1c, 0x0a, 0x09, 0x41, 0x75, 0x64, 0x69, 0x6f, 0x50, 0x61, 0x74, 0x68, 0x18, 0x26, 0x20,
- 0x01, 0x28, 0x09, 0x52, 0x09, 0x41, 0x75, 0x64, 0x69, 0x6f, 0x50, 0x61, 0x74, 0x68, 0x12, 0x22,
- 0x0a, 0x0c, 0x51, 0x75, 0x61, 0x6e, 0x74, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x28,
- 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x51, 0x75, 0x61, 0x6e, 0x74, 0x69, 0x7a, 0x61, 0x74, 0x69,
- 0x6f, 0x6e, 0x12, 0x32, 0x0a, 0x14, 0x47, 0x50, 0x55, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55,
- 0x74, 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x32, 0x20, 0x01, 0x28, 0x02,
- 0x52, 0x14, 0x47, 0x50, 0x55, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x74, 0x69, 0x6c, 0x69,
- 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x28, 0x0a, 0x0f, 0x54, 0x72, 0x75, 0x73, 0x74, 0x52,
- 0x65, 0x6d, 0x6f, 0x74, 0x65, 0x43, 0x6f, 0x64, 0x65, 0x18, 0x33, 0x20, 0x01, 0x28, 0x08, 0x52,
- 0x0f, 0x54, 0x72, 0x75, 0x73, 0x74, 0x52, 0x65, 0x6d, 0x6f, 0x74, 0x65, 0x43, 0x6f, 0x64, 0x65,
- 0x12, 0x22, 0x0a, 0x0c, 0x45, 0x6e, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x45, 0x61, 0x67, 0x65, 0x72,
- 0x18, 0x34, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0c, 0x45, 0x6e, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x45,
- 0x61, 0x67, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x53, 0x77, 0x61, 0x70, 0x53, 0x70, 0x61, 0x63,
- 0x65, 0x18, 0x35, 0x20, 0x01, 0x28, 0x05, 0x52, 0x09, 0x53, 0x77, 0x61, 0x70, 0x53, 0x70, 0x61,
- 0x63, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x4d, 0x61, 0x78, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4c, 0x65,
- 0x6e, 0x18, 0x36, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x4d, 0x61, 0x78, 0x4d, 0x6f, 0x64, 0x65,
- 0x6c, 0x4c, 0x65, 0x6e, 0x12, 0x16, 0x0a, 0x06, 0x4d, 0x4d, 0x50, 0x72, 0x6f, 0x6a, 0x18, 0x29,
- 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x4d, 0x4d, 0x50, 0x72, 0x6f, 0x6a, 0x12, 0x20, 0x0a, 0x0b,
- 0x52, 0x6f, 0x70, 0x65, 0x53, 0x63, 0x61, 0x6c, 0x69, 0x6e, 0x67, 0x18, 0x2b, 0x20, 0x01, 0x28,
- 0x09, 0x52, 0x0b, 0x52, 0x6f, 0x70, 0x65, 0x53, 0x63, 0x61, 0x6c, 0x69, 0x6e, 0x67, 0x12, 0x24,
- 0x0a, 0x0d, 0x59, 0x61, 0x72, 0x6e, 0x45, 0x78, 0x74, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x18,
- 0x2c, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0d, 0x59, 0x61, 0x72, 0x6e, 0x45, 0x78, 0x74, 0x46, 0x61,
- 0x63, 0x74, 0x6f, 0x72, 0x12, 0x26, 0x0a, 0x0e, 0x59, 0x61, 0x72, 0x6e, 0x41, 0x74, 0x74, 0x6e,
- 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x18, 0x2d, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0e, 0x59, 0x61,
- 0x72, 0x6e, 0x41, 0x74, 0x74, 0x6e, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x12, 0x22, 0x0a, 0x0c,
- 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, 0x46, 0x61, 0x73, 0x74, 0x18, 0x2e, 0x20, 0x01,
- 0x28, 0x02, 0x52, 0x0c, 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, 0x46, 0x61, 0x73, 0x74,
- 0x12, 0x22, 0x0a, 0x0c, 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, 0x53, 0x6c, 0x6f, 0x77,
- 0x18, 0x2f, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61,
- 0x53, 0x6c, 0x6f, 0x77, 0x12, 0x12, 0x0a, 0x04, 0x54, 0x79, 0x70, 0x65, 0x18, 0x31, 0x20, 0x01,
- 0x28, 0x09, 0x52, 0x04, 0x54, 0x79, 0x70, 0x65, 0x22, 0x3c, 0x0a, 0x06, 0x52, 0x65, 0x73, 0x75,
- 0x6c, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20,
- 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07,
- 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x73,
- 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x22, 0x31, 0x0a, 0x0f, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64,
- 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x1e, 0x0a, 0x0a, 0x65, 0x6d, 0x62,
- 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x02, 0x52, 0x0a, 0x65,
- 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x22, 0x5b, 0x0a, 0x11, 0x54, 0x72, 0x61,
- 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x10,
- 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74,
- 0x12, 0x1a, 0x0a, 0x08, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x18, 0x03, 0x20, 0x01,
- 0x28, 0x09, 0x52, 0x08, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07,
- 0x74, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x07, 0x74,
- 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x22, 0x5e, 0x0a, 0x10, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63,
- 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x36, 0x0a, 0x08, 0x73, 0x65,
- 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x62,
- 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70,
- 0x74, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x52, 0x08, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e,
- 0x74, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09,
- 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x22, 0x77, 0x0a, 0x11, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63,
- 0x72, 0x69, 0x70, 0x74, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x12, 0x0e, 0x0a, 0x02, 0x69,
- 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x02, 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x73,
- 0x74, 0x61, 0x72, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52, 0x05, 0x73, 0x74, 0x61, 0x72,
- 0x74, 0x12, 0x10, 0x0a, 0x03, 0x65, 0x6e, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x03,
- 0x65, 0x6e, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28,
- 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e,
- 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x05, 0x52, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x22,
- 0xbe, 0x02, 0x0a, 0x14, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67,
- 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x68, 0x65, 0x69, 0x67,
- 0x68, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x68, 0x65, 0x69, 0x67, 0x68, 0x74,
- 0x12, 0x14, 0x0a, 0x05, 0x77, 0x69, 0x64, 0x74, 0x68, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52,
- 0x05, 0x77, 0x69, 0x64, 0x74, 0x68, 0x12, 0x12, 0x0a, 0x04, 0x6d, 0x6f, 0x64, 0x65, 0x18, 0x03,
- 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x6d, 0x6f, 0x64, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x74,
- 0x65, 0x70, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x73, 0x74, 0x65, 0x70, 0x12, 0x12,
- 0x0a, 0x04, 0x73, 0x65, 0x65, 0x64, 0x18, 0x05, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x73, 0x65,
- 0x65, 0x64, 0x12, 0x27, 0x0a, 0x0f, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x70,
- 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x73,
- 0x69, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x27, 0x0a, 0x0f, 0x6e,
- 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x07,
- 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x6e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72,
- 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x08, 0x20, 0x01, 0x28,
- 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x72, 0x63, 0x18, 0x09, 0x20,
- 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x72, 0x63, 0x12, 0x2a, 0x0a, 0x10, 0x45, 0x6e, 0x61, 0x62,
- 0x6c, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x73, 0x18, 0x0a, 0x20, 0x01,
- 0x28, 0x09, 0x52, 0x10, 0x45, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65,
- 0x74, 0x65, 0x72, 0x73, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70,
- 0x18, 0x0b, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70,
- 0x22, 0x5e, 0x0a, 0x0a, 0x54, 0x54, 0x53, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x12,
- 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65,
- 0x78, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28,
- 0x09, 0x52, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18,
- 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x6f,
- 0x69, 0x63, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x6f, 0x69, 0x63, 0x65,
- 0x22, 0x46, 0x0a, 0x14, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e,
- 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x6c, 0x65, 0x6e, 0x67,
- 0x74, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68,
- 0x12, 0x16, 0x0a, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x05,
- 0x52, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x22, 0xac, 0x01, 0x0a, 0x0f, 0x4d, 0x65, 0x6d,
- 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65, 0x44, 0x61, 0x74, 0x61, 0x12, 0x14, 0x0a, 0x05,
- 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x74, 0x6f, 0x74,
- 0x61, 0x6c, 0x12, 0x45, 0x0a, 0x09, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x18,
- 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e,
- 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65, 0x44, 0x61, 0x74, 0x61, 0x2e,
- 0x42, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x09,
- 0x62, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x1a, 0x3c, 0x0a, 0x0e, 0x42, 0x72, 0x65,
- 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b,
- 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a,
- 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x76, 0x61,
- 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0xbc, 0x01, 0x0a, 0x0e, 0x53, 0x74, 0x61, 0x74,
- 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x33, 0x0a, 0x05, 0x73, 0x74,
- 0x61, 0x74, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b,
- 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e,
- 0x73, 0x65, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12,
- 0x30, 0x0a, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32,
- 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79,
- 0x55, 0x73, 0x61, 0x67, 0x65, 0x44, 0x61, 0x74, 0x61, 0x52, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72,
- 0x79, 0x22, 0x43, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x11, 0x0a, 0x0d, 0x55, 0x4e,
- 0x49, 0x4e, 0x49, 0x54, 0x49, 0x41, 0x4c, 0x49, 0x5a, 0x45, 0x44, 0x10, 0x00, 0x12, 0x08, 0x0a,
- 0x04, 0x42, 0x55, 0x53, 0x59, 0x10, 0x01, 0x12, 0x09, 0x0a, 0x05, 0x52, 0x45, 0x41, 0x44, 0x59,
- 0x10, 0x02, 0x12, 0x12, 0x0a, 0x05, 0x45, 0x52, 0x52, 0x4f, 0x52, 0x10, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0x32, 0xf4, 0x04, 0x0a, 0x07, 0x42, 0x61, 0x63, 0x6b, 0x65,
- 0x6e, 0x64, 0x12, 0x32, 0x0a, 0x06, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x12, 0x16, 0x2e, 0x62,
- 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73,
- 0x73, 0x61, 0x67, 0x65, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52,
- 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x34, 0x0a, 0x07, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63,
- 0x74, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64,
- 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63,
- 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x35, 0x0a, 0x09,
- 0x4c, 0x6f, 0x61, 0x64, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x15, 0x2e, 0x62, 0x61, 0x63, 0x6b,
- 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73,
- 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c,
- 0x74, 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0d, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x53, 0x74,
- 0x72, 0x65, 0x61, 0x6d, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50,
- 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0e, 0x2e,
- 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x30,
- 0x01, 0x12, 0x40, 0x0a, 0x09, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x12, 0x17,
- 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74,
+ 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74, 0x18, 0x0e, 0x20, 0x01, 0x28,
+ 0x09, 0x52, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74, 0x12, 0x18,
+ 0x0a, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, 0x0f, 0x20, 0x01, 0x28, 0x05, 0x52,
+ 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x12, 0x2c, 0x0a, 0x11, 0x4c, 0x69, 0x62, 0x72,
+ 0x61, 0x72, 0x79, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, 0x50, 0x61, 0x74, 0x68, 0x18, 0x10, 0x20,
+ 0x01, 0x28, 0x09, 0x52, 0x11, 0x4c, 0x69, 0x62, 0x72, 0x61, 0x72, 0x79, 0x53, 0x65, 0x61, 0x72,
+ 0x63, 0x68, 0x50, 0x61, 0x74, 0x68, 0x12, 0x22, 0x0a, 0x0c, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72,
+ 0x65, 0x71, 0x42, 0x61, 0x73, 0x65, 0x18, 0x11, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, 0x52, 0x6f,
+ 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x42, 0x61, 0x73, 0x65, 0x12, 0x24, 0x0a, 0x0d, 0x52, 0x6f,
+ 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x12, 0x20, 0x01, 0x28,
+ 0x02, 0x52, 0x0d, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61, 0x6c, 0x65,
+ 0x12, 0x1e, 0x0a, 0x0a, 0x52, 0x4d, 0x53, 0x4e, 0x6f, 0x72, 0x6d, 0x45, 0x70, 0x73, 0x18, 0x13,
+ 0x20, 0x01, 0x28, 0x02, 0x52, 0x0a, 0x52, 0x4d, 0x53, 0x4e, 0x6f, 0x72, 0x6d, 0x45, 0x70, 0x73,
+ 0x12, 0x12, 0x0a, 0x04, 0x4e, 0x47, 0x51, 0x41, 0x18, 0x14, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04,
+ 0x4e, 0x47, 0x51, 0x41, 0x12, 0x1c, 0x0a, 0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x46, 0x69, 0x6c,
+ 0x65, 0x18, 0x15, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x46, 0x69,
+ 0x6c, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x44, 0x65, 0x76, 0x69, 0x63, 0x65, 0x18, 0x16, 0x20, 0x01,
+ 0x28, 0x09, 0x52, 0x06, 0x44, 0x65, 0x76, 0x69, 0x63, 0x65, 0x12, 0x1c, 0x0a, 0x09, 0x55, 0x73,
+ 0x65, 0x54, 0x72, 0x69, 0x74, 0x6f, 0x6e, 0x18, 0x17, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x55,
+ 0x73, 0x65, 0x54, 0x72, 0x69, 0x74, 0x6f, 0x6e, 0x12, 0x24, 0x0a, 0x0d, 0x4d, 0x6f, 0x64, 0x65,
+ 0x6c, 0x42, 0x61, 0x73, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x18, 0x18, 0x20, 0x01, 0x28, 0x09, 0x52,
+ 0x0d, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x42, 0x61, 0x73, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x2a,
+ 0x0a, 0x10, 0x55, 0x73, 0x65, 0x46, 0x61, 0x73, 0x74, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a,
+ 0x65, 0x72, 0x18, 0x19, 0x20, 0x01, 0x28, 0x08, 0x52, 0x10, 0x55, 0x73, 0x65, 0x46, 0x61, 0x73,
+ 0x74, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x12, 0x22, 0x0a, 0x0c, 0x50, 0x69,
+ 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x54, 0x79, 0x70, 0x65, 0x18, 0x1a, 0x20, 0x01, 0x28, 0x09,
+ 0x52, 0x0c, 0x50, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x54, 0x79, 0x70, 0x65, 0x12, 0x24,
+ 0x0a, 0x0d, 0x53, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, 0x65, 0x72, 0x54, 0x79, 0x70, 0x65, 0x18,
+ 0x1b, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x53, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, 0x65, 0x72,
+ 0x54, 0x79, 0x70, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x43, 0x55, 0x44, 0x41, 0x18, 0x1c, 0x20, 0x01,
+ 0x28, 0x08, 0x52, 0x04, 0x43, 0x55, 0x44, 0x41, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x46, 0x47, 0x53,
+ 0x63, 0x61, 0x6c, 0x65, 0x18, 0x1d, 0x20, 0x01, 0x28, 0x02, 0x52, 0x08, 0x43, 0x46, 0x47, 0x53,
+ 0x63, 0x61, 0x6c, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x49, 0x4d, 0x47, 0x32, 0x49, 0x4d, 0x47, 0x18,
+ 0x1e, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x49, 0x4d, 0x47, 0x32, 0x49, 0x4d, 0x47, 0x12, 0x1c,
+ 0x0a, 0x09, 0x43, 0x4c, 0x49, 0x50, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x1f, 0x20, 0x01, 0x28,
+ 0x09, 0x52, 0x09, 0x43, 0x4c, 0x49, 0x50, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x24, 0x0a, 0x0d,
+ 0x43, 0x4c, 0x49, 0x50, 0x53, 0x75, 0x62, 0x66, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x18, 0x20, 0x20,
+ 0x01, 0x28, 0x09, 0x52, 0x0d, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x75, 0x62, 0x66, 0x6f, 0x6c, 0x64,
+ 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x18, 0x21,
+ 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x12, 0x1e,
+ 0x0a, 0x0a, 0x43, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x4e, 0x65, 0x74, 0x18, 0x30, 0x20, 0x01,
+ 0x28, 0x09, 0x52, 0x0a, 0x43, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x4e, 0x65, 0x74, 0x12, 0x1c,
+ 0x0a, 0x09, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x18, 0x22, 0x20, 0x01, 0x28,
+ 0x09, 0x52, 0x09, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08,
+ 0x4c, 0x6f, 0x72, 0x61, 0x42, 0x61, 0x73, 0x65, 0x18, 0x23, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08,
+ 0x4c, 0x6f, 0x72, 0x61, 0x42, 0x61, 0x73, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x4c, 0x6f, 0x72, 0x61,
+ 0x41, 0x64, 0x61, 0x70, 0x74, 0x65, 0x72, 0x18, 0x24, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x4c,
+ 0x6f, 0x72, 0x61, 0x41, 0x64, 0x61, 0x70, 0x74, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x4c, 0x6f,
+ 0x72, 0x61, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x2a, 0x20, 0x01, 0x28, 0x02, 0x52, 0x09, 0x4c,
+ 0x6f, 0x72, 0x61, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x1c, 0x0a, 0x09, 0x4e, 0x6f, 0x4d, 0x75,
+ 0x6c, 0x4d, 0x61, 0x74, 0x51, 0x18, 0x25, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x4e, 0x6f, 0x4d,
+ 0x75, 0x6c, 0x4d, 0x61, 0x74, 0x51, 0x12, 0x1e, 0x0a, 0x0a, 0x44, 0x72, 0x61, 0x66, 0x74, 0x4d,
+ 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x27, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x44, 0x72, 0x61, 0x66,
+ 0x74, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x1c, 0x0a, 0x09, 0x41, 0x75, 0x64, 0x69, 0x6f, 0x50,
+ 0x61, 0x74, 0x68, 0x18, 0x26, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x41, 0x75, 0x64, 0x69, 0x6f,
+ 0x50, 0x61, 0x74, 0x68, 0x12, 0x22, 0x0a, 0x0c, 0x51, 0x75, 0x61, 0x6e, 0x74, 0x69, 0x7a, 0x61,
+ 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x28, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x51, 0x75, 0x61, 0x6e,
+ 0x74, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x32, 0x0a, 0x14, 0x47, 0x50, 0x55, 0x4d,
+ 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x74, 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e,
+ 0x18, 0x32, 0x20, 0x01, 0x28, 0x02, 0x52, 0x14, 0x47, 0x50, 0x55, 0x4d, 0x65, 0x6d, 0x6f, 0x72,
+ 0x79, 0x55, 0x74, 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x28, 0x0a, 0x0f,
+ 0x54, 0x72, 0x75, 0x73, 0x74, 0x52, 0x65, 0x6d, 0x6f, 0x74, 0x65, 0x43, 0x6f, 0x64, 0x65, 0x18,
+ 0x33, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0f, 0x54, 0x72, 0x75, 0x73, 0x74, 0x52, 0x65, 0x6d, 0x6f,
+ 0x74, 0x65, 0x43, 0x6f, 0x64, 0x65, 0x12, 0x22, 0x0a, 0x0c, 0x45, 0x6e, 0x66, 0x6f, 0x72, 0x63,
+ 0x65, 0x45, 0x61, 0x67, 0x65, 0x72, 0x18, 0x34, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0c, 0x45, 0x6e,
+ 0x66, 0x6f, 0x72, 0x63, 0x65, 0x45, 0x61, 0x67, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x53, 0x77,
+ 0x61, 0x70, 0x53, 0x70, 0x61, 0x63, 0x65, 0x18, 0x35, 0x20, 0x01, 0x28, 0x05, 0x52, 0x09, 0x53,
+ 0x77, 0x61, 0x70, 0x53, 0x70, 0x61, 0x63, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x4d, 0x61, 0x78, 0x4d,
+ 0x6f, 0x64, 0x65, 0x6c, 0x4c, 0x65, 0x6e, 0x18, 0x36, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x4d,
+ 0x61, 0x78, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4c, 0x65, 0x6e, 0x12, 0x16, 0x0a, 0x06, 0x4d, 0x4d,
+ 0x50, 0x72, 0x6f, 0x6a, 0x18, 0x29, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x4d, 0x4d, 0x50, 0x72,
+ 0x6f, 0x6a, 0x12, 0x20, 0x0a, 0x0b, 0x52, 0x6f, 0x70, 0x65, 0x53, 0x63, 0x61, 0x6c, 0x69, 0x6e,
+ 0x67, 0x18, 0x2b, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x52, 0x6f, 0x70, 0x65, 0x53, 0x63, 0x61,
+ 0x6c, 0x69, 0x6e, 0x67, 0x12, 0x24, 0x0a, 0x0d, 0x59, 0x61, 0x72, 0x6e, 0x45, 0x78, 0x74, 0x46,
+ 0x61, 0x63, 0x74, 0x6f, 0x72, 0x18, 0x2c, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0d, 0x59, 0x61, 0x72,
+ 0x6e, 0x45, 0x78, 0x74, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x12, 0x26, 0x0a, 0x0e, 0x59, 0x61,
+ 0x72, 0x6e, 0x41, 0x74, 0x74, 0x6e, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x18, 0x2d, 0x20, 0x01,
+ 0x28, 0x02, 0x52, 0x0e, 0x59, 0x61, 0x72, 0x6e, 0x41, 0x74, 0x74, 0x6e, 0x46, 0x61, 0x63, 0x74,
+ 0x6f, 0x72, 0x12, 0x22, 0x0a, 0x0c, 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, 0x46, 0x61,
+ 0x73, 0x74, 0x18, 0x2e, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65,
+ 0x74, 0x61, 0x46, 0x61, 0x73, 0x74, 0x12, 0x22, 0x0a, 0x0c, 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65,
+ 0x74, 0x61, 0x53, 0x6c, 0x6f, 0x77, 0x18, 0x2f, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, 0x59, 0x61,
+ 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, 0x53, 0x6c, 0x6f, 0x77, 0x12, 0x12, 0x0a, 0x04, 0x54, 0x79,
+ 0x70, 0x65, 0x18, 0x31, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x54, 0x79, 0x70, 0x65, 0x22, 0x3c,
+ 0x0a, 0x06, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73,
+ 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61,
+ 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x18, 0x02, 0x20,
+ 0x01, 0x28, 0x08, 0x52, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x22, 0x31, 0x0a, 0x0f,
+ 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12,
+ 0x1e, 0x0a, 0x0a, 0x65, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x01, 0x20,
+ 0x03, 0x28, 0x02, 0x52, 0x0a, 0x65, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x22,
+ 0x5b, 0x0a, 0x11, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x71,
+ 0x75, 0x65, 0x73, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28,
+ 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61,
+ 0x67, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61,
+ 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x74, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, 0x04, 0x20,
+ 0x01, 0x28, 0x0d, 0x52, 0x07, 0x74, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x22, 0x5e, 0x0a, 0x10,
+ 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74,
+ 0x12, 0x36, 0x0a, 0x08, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03,
+ 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61,
+ 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x52, 0x08,
+ 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74,
+ 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x22, 0x77, 0x0a, 0x11,
+ 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e,
+ 0x74, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x02, 0x69,
+ 0x64, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03,
+ 0x52, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x65, 0x6e, 0x64, 0x18, 0x03,
+ 0x20, 0x01, 0x28, 0x03, 0x52, 0x03, 0x65, 0x6e, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78,
+ 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, 0x16, 0x0a,
+ 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x05, 0x52, 0x06, 0x74,
+ 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x22, 0xbe, 0x02, 0x0a, 0x14, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61,
+ 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x16,
+ 0x0a, 0x06, 0x68, 0x65, 0x69, 0x67, 0x68, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06,
+ 0x68, 0x65, 0x69, 0x67, 0x68, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x77, 0x69, 0x64, 0x74, 0x68, 0x18,
+ 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x77, 0x69, 0x64, 0x74, 0x68, 0x12, 0x12, 0x0a, 0x04,
+ 0x6d, 0x6f, 0x64, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x6d, 0x6f, 0x64, 0x65,
+ 0x12, 0x12, 0x0a, 0x04, 0x73, 0x74, 0x65, 0x70, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04,
+ 0x73, 0x74, 0x65, 0x70, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x65, 0x65, 0x64, 0x18, 0x05, 0x20, 0x01,
+ 0x28, 0x05, 0x52, 0x04, 0x73, 0x65, 0x65, 0x64, 0x12, 0x27, 0x0a, 0x0f, 0x70, 0x6f, 0x73, 0x69,
+ 0x74, 0x69, 0x76, 0x65, 0x5f, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x06, 0x20, 0x01, 0x28,
+ 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70,
+ 0x74, 0x12, 0x27, 0x0a, 0x0f, 0x6e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x70, 0x72,
+ 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x6e, 0x65, 0x67, 0x61,
+ 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73,
+ 0x74, 0x18, 0x08, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x12, 0x10, 0x0a, 0x03,
+ 0x73, 0x72, 0x63, 0x18, 0x09, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x72, 0x63, 0x12, 0x2a,
+ 0x0a, 0x10, 0x45, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65,
+ 0x72, 0x73, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x09, 0x52, 0x10, 0x45, 0x6e, 0x61, 0x62, 0x6c, 0x65,
+ 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x73, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x4c,
+ 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x43, 0x4c,
+ 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x22, 0x5e, 0x0a, 0x0a, 0x54, 0x54, 0x53, 0x52, 0x65, 0x71,
+ 0x75, 0x65, 0x73, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x01, 0x20, 0x01,
+ 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x6d, 0x6f, 0x64, 0x65,
+ 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x10,
+ 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74,
+ 0x12, 0x14, 0x0a, 0x05, 0x76, 0x6f, 0x69, 0x63, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52,
+ 0x05, 0x76, 0x6f, 0x69, 0x63, 0x65, 0x22, 0x46, 0x0a, 0x14, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69,
+ 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x16,
+ 0x0a, 0x06, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06,
+ 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x12, 0x16, 0x0a, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73,
+ 0x18, 0x02, 0x20, 0x03, 0x28, 0x05, 0x52, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x22, 0xac,
+ 0x01, 0x0a, 0x0f, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65, 0x44, 0x61,
+ 0x74, 0x61, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28,
+ 0x04, 0x52, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x12, 0x45, 0x0a, 0x09, 0x62, 0x72, 0x65, 0x61,
+ 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x62, 0x61,
+ 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67,
+ 0x65, 0x44, 0x61, 0x74, 0x61, 0x2e, 0x42, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x45,
+ 0x6e, 0x74, 0x72, 0x79, 0x52, 0x09, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x1a,
+ 0x3c, 0x0a, 0x0e, 0x42, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x45, 0x6e, 0x74, 0x72,
+ 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03,
+ 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01,
+ 0x28, 0x04, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0xbc, 0x01,
+ 0x0a, 0x0e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65,
+ 0x12, 0x33, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32,
+ 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73,
+ 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05,
+ 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x30, 0x0a, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x18,
+ 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e,
+ 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65, 0x44, 0x61, 0x74, 0x61, 0x52,
+ 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x22, 0x43, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, 0x65,
+ 0x12, 0x11, 0x0a, 0x0d, 0x55, 0x4e, 0x49, 0x4e, 0x49, 0x54, 0x49, 0x41, 0x4c, 0x49, 0x5a, 0x45,
+ 0x44, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x42, 0x55, 0x53, 0x59, 0x10, 0x01, 0x12, 0x09, 0x0a,
+ 0x05, 0x52, 0x45, 0x41, 0x44, 0x59, 0x10, 0x02, 0x12, 0x12, 0x0a, 0x05, 0x45, 0x52, 0x52, 0x4f,
+ 0x52, 0x10, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0x32, 0xfb, 0x06, 0x0a,
+ 0x07, 0x42, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x12, 0x32, 0x0a, 0x06, 0x48, 0x65, 0x61, 0x6c,
+ 0x74, 0x68, 0x12, 0x16, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, 0x65, 0x61,
+ 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63,
+ 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x34, 0x0a, 0x07,
+ 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e,
+ 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73,
+ 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79,
+ 0x22, 0x00, 0x12, 0x35, 0x0a, 0x09, 0x4c, 0x6f, 0x61, 0x64, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12,
+ 0x15, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f,
+ 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64,
+ 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0d, 0x50, 0x72, 0x65,
+ 0x64, 0x69, 0x63, 0x74, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63,
+ 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69,
+ 0x6f, 0x6e, 0x73, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65,
+ 0x70, 0x6c, 0x79, 0x22, 0x00, 0x30, 0x01, 0x12, 0x40, 0x0a, 0x09, 0x45, 0x6d, 0x62, 0x65, 0x64,
+ 0x64, 0x69, 0x6e, 0x67, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50,
+ 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x18, 0x2e,
+ 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e,
+ 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x41, 0x0a, 0x0d, 0x47, 0x65, 0x6e,
+ 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x12, 0x1d, 0x2e, 0x62, 0x61, 0x63,
+ 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61,
+ 0x67, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b,
+ 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4d, 0x0a, 0x12,
+ 0x41, 0x75, 0x64, 0x69, 0x6f, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69,
+ 0x6f, 0x6e, 0x12, 0x1a, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61,
+ 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x19,
+ 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72,
+ 0x69, 0x70, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x2d, 0x0a, 0x03, 0x54,
+ 0x54, 0x53, 0x12, 0x13, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x54, 0x53,
+ 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e,
+ 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4a, 0x0a, 0x0e, 0x54, 0x6f,
+ 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x12, 0x17, 0x2e, 0x62,
+ 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70,
+ 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e,
+ 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70,
+ 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x3b, 0x0a, 0x06, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73,
+ 0x12, 0x16, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74,
+ 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x1a, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65,
+ 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73,
+ 0x65, 0x22, 0x00, 0x12, 0x39, 0x0a, 0x09, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x53, 0x65, 0x74,
+ 0x12, 0x19, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65,
+ 0x73, 0x53, 0x65, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0f, 0x2e, 0x62, 0x61,
+ 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x3f,
+ 0x0a, 0x0c, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x12, 0x1c,
+ 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x44,
+ 0x65, 0x6c, 0x65, 0x74, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0f, 0x2e, 0x62,
+ 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12,
+ 0x42, 0x0a, 0x09, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x47, 0x65, 0x74, 0x12, 0x19, 0x2e, 0x62,
+ 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x47, 0x65, 0x74,
0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e,
- 0x64, 0x2e, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c,
- 0x74, 0x22, 0x00, 0x12, 0x41, 0x0a, 0x0d, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49,
- 0x6d, 0x61, 0x67, 0x65, 0x12, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x47,
- 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x52, 0x65, 0x71, 0x75,
- 0x65, 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65,
- 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4d, 0x0a, 0x12, 0x41, 0x75, 0x64, 0x69, 0x6f, 0x54,
- 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x1a, 0x2e, 0x62,
- 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70,
- 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x19, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65,
- 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x73,
- 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x2d, 0x0a, 0x03, 0x54, 0x54, 0x53, 0x12, 0x13, 0x2e, 0x62,
- 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x54, 0x53, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73,
- 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75,
- 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4a, 0x0a, 0x0e, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65,
- 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64,
- 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a,
- 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69,
- 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00,
- 0x12, 0x3b, 0x0a, 0x06, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x16, 0x2e, 0x62, 0x61, 0x63,
- 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61,
- 0x67, 0x65, 0x1a, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x61,
- 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x42, 0x5a, 0x0a,
- 0x19, 0x69, 0x6f, 0x2e, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2e, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
- 0x61, 0x69, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x42, 0x0e, 0x4c, 0x6f, 0x63, 0x61,
- 0x6c, 0x41, 0x49, 0x42, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x50, 0x01, 0x5a, 0x2b, 0x67, 0x69,
- 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x67, 0x6f, 0x2d, 0x73, 0x6b, 0x79, 0x6e,
- 0x65, 0x74, 0x2f, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x67,
- 0x72, 0x70, 0x63, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f,
- 0x33,
+ 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x47, 0x65, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c,
+ 0x74, 0x22, 0x00, 0x12, 0x45, 0x0a, 0x0a, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69, 0x6e,
+ 0x64, 0x12, 0x1a, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72,
+ 0x65, 0x73, 0x46, 0x69, 0x6e, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x19, 0x2e,
+ 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69,
+ 0x6e, 0x64, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x42, 0x5a, 0x0a, 0x19, 0x69, 0x6f,
+ 0x2e, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2e, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x61, 0x69, 0x2e,
+ 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x42, 0x0e, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49,
+ 0x42, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x50, 0x01, 0x5a, 0x2b, 0x67, 0x69, 0x74, 0x68, 0x75,
+ 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x67, 0x6f, 0x2d, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2f,
+ 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x67, 0x72, 0x70, 0x63,
+ 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
}
var (
@@ -1999,55 +2470,80 @@ func file_backend_proto_rawDescGZIP() []byte {
}
var file_backend_proto_enumTypes = make([]protoimpl.EnumInfo, 1)
-var file_backend_proto_msgTypes = make([]protoimpl.MessageInfo, 15)
+var file_backend_proto_msgTypes = make([]protoimpl.MessageInfo, 23)
var file_backend_proto_goTypes = []interface{}{
(StatusResponse_State)(0), // 0: backend.StatusResponse.State
- (*HealthMessage)(nil), // 1: backend.HealthMessage
- (*PredictOptions)(nil), // 2: backend.PredictOptions
- (*Reply)(nil), // 3: backend.Reply
- (*ModelOptions)(nil), // 4: backend.ModelOptions
- (*Result)(nil), // 5: backend.Result
- (*EmbeddingResult)(nil), // 6: backend.EmbeddingResult
- (*TranscriptRequest)(nil), // 7: backend.TranscriptRequest
- (*TranscriptResult)(nil), // 8: backend.TranscriptResult
- (*TranscriptSegment)(nil), // 9: backend.TranscriptSegment
- (*GenerateImageRequest)(nil), // 10: backend.GenerateImageRequest
- (*TTSRequest)(nil), // 11: backend.TTSRequest
- (*TokenizationResponse)(nil), // 12: backend.TokenizationResponse
- (*MemoryUsageData)(nil), // 13: backend.MemoryUsageData
- (*StatusResponse)(nil), // 14: backend.StatusResponse
- nil, // 15: backend.MemoryUsageData.BreakdownEntry
+ (*StoresKey)(nil), // 1: backend.StoresKey
+ (*StoresValue)(nil), // 2: backend.StoresValue
+ (*StoresSetOptions)(nil), // 3: backend.StoresSetOptions
+ (*StoresDeleteOptions)(nil), // 4: backend.StoresDeleteOptions
+ (*StoresGetOptions)(nil), // 5: backend.StoresGetOptions
+ (*StoresGetResult)(nil), // 6: backend.StoresGetResult
+ (*StoresFindOptions)(nil), // 7: backend.StoresFindOptions
+ (*StoresFindResult)(nil), // 8: backend.StoresFindResult
+ (*HealthMessage)(nil), // 9: backend.HealthMessage
+ (*PredictOptions)(nil), // 10: backend.PredictOptions
+ (*Reply)(nil), // 11: backend.Reply
+ (*ModelOptions)(nil), // 12: backend.ModelOptions
+ (*Result)(nil), // 13: backend.Result
+ (*EmbeddingResult)(nil), // 14: backend.EmbeddingResult
+ (*TranscriptRequest)(nil), // 15: backend.TranscriptRequest
+ (*TranscriptResult)(nil), // 16: backend.TranscriptResult
+ (*TranscriptSegment)(nil), // 17: backend.TranscriptSegment
+ (*GenerateImageRequest)(nil), // 18: backend.GenerateImageRequest
+ (*TTSRequest)(nil), // 19: backend.TTSRequest
+ (*TokenizationResponse)(nil), // 20: backend.TokenizationResponse
+ (*MemoryUsageData)(nil), // 21: backend.MemoryUsageData
+ (*StatusResponse)(nil), // 22: backend.StatusResponse
+ nil, // 23: backend.MemoryUsageData.BreakdownEntry
}
var file_backend_proto_depIdxs = []int32{
- 9, // 0: backend.TranscriptResult.segments:type_name -> backend.TranscriptSegment
- 15, // 1: backend.MemoryUsageData.breakdown:type_name -> backend.MemoryUsageData.BreakdownEntry
- 0, // 2: backend.StatusResponse.state:type_name -> backend.StatusResponse.State
- 13, // 3: backend.StatusResponse.memory:type_name -> backend.MemoryUsageData
- 1, // 4: backend.Backend.Health:input_type -> backend.HealthMessage
- 2, // 5: backend.Backend.Predict:input_type -> backend.PredictOptions
- 4, // 6: backend.Backend.LoadModel:input_type -> backend.ModelOptions
- 2, // 7: backend.Backend.PredictStream:input_type -> backend.PredictOptions
- 2, // 8: backend.Backend.Embedding:input_type -> backend.PredictOptions
- 10, // 9: backend.Backend.GenerateImage:input_type -> backend.GenerateImageRequest
- 7, // 10: backend.Backend.AudioTranscription:input_type -> backend.TranscriptRequest
- 11, // 11: backend.Backend.TTS:input_type -> backend.TTSRequest
- 2, // 12: backend.Backend.TokenizeString:input_type -> backend.PredictOptions
- 1, // 13: backend.Backend.Status:input_type -> backend.HealthMessage
- 3, // 14: backend.Backend.Health:output_type -> backend.Reply
- 3, // 15: backend.Backend.Predict:output_type -> backend.Reply
- 5, // 16: backend.Backend.LoadModel:output_type -> backend.Result
- 3, // 17: backend.Backend.PredictStream:output_type -> backend.Reply
- 6, // 18: backend.Backend.Embedding:output_type -> backend.EmbeddingResult
- 5, // 19: backend.Backend.GenerateImage:output_type -> backend.Result
- 8, // 20: backend.Backend.AudioTranscription:output_type -> backend.TranscriptResult
- 5, // 21: backend.Backend.TTS:output_type -> backend.Result
- 12, // 22: backend.Backend.TokenizeString:output_type -> backend.TokenizationResponse
- 14, // 23: backend.Backend.Status:output_type -> backend.StatusResponse
- 14, // [14:24] is the sub-list for method output_type
- 4, // [4:14] is the sub-list for method input_type
- 4, // [4:4] is the sub-list for extension type_name
- 4, // [4:4] is the sub-list for extension extendee
- 0, // [0:4] is the sub-list for field type_name
+ 1, // 0: backend.StoresSetOptions.Keys:type_name -> backend.StoresKey
+ 2, // 1: backend.StoresSetOptions.Values:type_name -> backend.StoresValue
+ 1, // 2: backend.StoresDeleteOptions.Keys:type_name -> backend.StoresKey
+ 1, // 3: backend.StoresGetOptions.Keys:type_name -> backend.StoresKey
+ 1, // 4: backend.StoresGetResult.Keys:type_name -> backend.StoresKey
+ 2, // 5: backend.StoresGetResult.Values:type_name -> backend.StoresValue
+ 1, // 6: backend.StoresFindOptions.Key:type_name -> backend.StoresKey
+ 1, // 7: backend.StoresFindResult.Keys:type_name -> backend.StoresKey
+ 2, // 8: backend.StoresFindResult.Values:type_name -> backend.StoresValue
+ 17, // 9: backend.TranscriptResult.segments:type_name -> backend.TranscriptSegment
+ 23, // 10: backend.MemoryUsageData.breakdown:type_name -> backend.MemoryUsageData.BreakdownEntry
+ 0, // 11: backend.StatusResponse.state:type_name -> backend.StatusResponse.State
+ 21, // 12: backend.StatusResponse.memory:type_name -> backend.MemoryUsageData
+ 9, // 13: backend.Backend.Health:input_type -> backend.HealthMessage
+ 10, // 14: backend.Backend.Predict:input_type -> backend.PredictOptions
+ 12, // 15: backend.Backend.LoadModel:input_type -> backend.ModelOptions
+ 10, // 16: backend.Backend.PredictStream:input_type -> backend.PredictOptions
+ 10, // 17: backend.Backend.Embedding:input_type -> backend.PredictOptions
+ 18, // 18: backend.Backend.GenerateImage:input_type -> backend.GenerateImageRequest
+ 15, // 19: backend.Backend.AudioTranscription:input_type -> backend.TranscriptRequest
+ 19, // 20: backend.Backend.TTS:input_type -> backend.TTSRequest
+ 10, // 21: backend.Backend.TokenizeString:input_type -> backend.PredictOptions
+ 9, // 22: backend.Backend.Status:input_type -> backend.HealthMessage
+ 3, // 23: backend.Backend.StoresSet:input_type -> backend.StoresSetOptions
+ 4, // 24: backend.Backend.StoresDelete:input_type -> backend.StoresDeleteOptions
+ 5, // 25: backend.Backend.StoresGet:input_type -> backend.StoresGetOptions
+ 7, // 26: backend.Backend.StoresFind:input_type -> backend.StoresFindOptions
+ 11, // 27: backend.Backend.Health:output_type -> backend.Reply
+ 11, // 28: backend.Backend.Predict:output_type -> backend.Reply
+ 13, // 29: backend.Backend.LoadModel:output_type -> backend.Result
+ 11, // 30: backend.Backend.PredictStream:output_type -> backend.Reply
+ 14, // 31: backend.Backend.Embedding:output_type -> backend.EmbeddingResult
+ 13, // 32: backend.Backend.GenerateImage:output_type -> backend.Result
+ 16, // 33: backend.Backend.AudioTranscription:output_type -> backend.TranscriptResult
+ 13, // 34: backend.Backend.TTS:output_type -> backend.Result
+ 20, // 35: backend.Backend.TokenizeString:output_type -> backend.TokenizationResponse
+ 22, // 36: backend.Backend.Status:output_type -> backend.StatusResponse
+ 13, // 37: backend.Backend.StoresSet:output_type -> backend.Result
+ 13, // 38: backend.Backend.StoresDelete:output_type -> backend.Result
+ 6, // 39: backend.Backend.StoresGet:output_type -> backend.StoresGetResult
+ 8, // 40: backend.Backend.StoresFind:output_type -> backend.StoresFindResult
+ 27, // [27:41] is the sub-list for method output_type
+ 13, // [13:27] is the sub-list for method input_type
+ 13, // [13:13] is the sub-list for extension type_name
+ 13, // [13:13] is the sub-list for extension extendee
+ 0, // [0:13] is the sub-list for field type_name
}
func init() { file_backend_proto_init() }
@@ -2057,7 +2553,7 @@ func file_backend_proto_init() {
}
if !protoimpl.UnsafeEnabled {
file_backend_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} {
- switch v := v.(*HealthMessage); i {
+ switch v := v.(*StoresKey); i {
case 0:
return &v.state
case 1:
@@ -2069,7 +2565,7 @@ func file_backend_proto_init() {
}
}
file_backend_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} {
- switch v := v.(*PredictOptions); i {
+ switch v := v.(*StoresValue); i {
case 0:
return &v.state
case 1:
@@ -2081,7 +2577,7 @@ func file_backend_proto_init() {
}
}
file_backend_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} {
- switch v := v.(*Reply); i {
+ switch v := v.(*StoresSetOptions); i {
case 0:
return &v.state
case 1:
@@ -2093,7 +2589,7 @@ func file_backend_proto_init() {
}
}
file_backend_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} {
- switch v := v.(*ModelOptions); i {
+ switch v := v.(*StoresDeleteOptions); i {
case 0:
return &v.state
case 1:
@@ -2105,7 +2601,7 @@ func file_backend_proto_init() {
}
}
file_backend_proto_msgTypes[4].Exporter = func(v interface{}, i int) interface{} {
- switch v := v.(*Result); i {
+ switch v := v.(*StoresGetOptions); i {
case 0:
return &v.state
case 1:
@@ -2117,7 +2613,7 @@ func file_backend_proto_init() {
}
}
file_backend_proto_msgTypes[5].Exporter = func(v interface{}, i int) interface{} {
- switch v := v.(*EmbeddingResult); i {
+ switch v := v.(*StoresGetResult); i {
case 0:
return &v.state
case 1:
@@ -2129,7 +2625,7 @@ func file_backend_proto_init() {
}
}
file_backend_proto_msgTypes[6].Exporter = func(v interface{}, i int) interface{} {
- switch v := v.(*TranscriptRequest); i {
+ switch v := v.(*StoresFindOptions); i {
case 0:
return &v.state
case 1:
@@ -2141,7 +2637,7 @@ func file_backend_proto_init() {
}
}
file_backend_proto_msgTypes[7].Exporter = func(v interface{}, i int) interface{} {
- switch v := v.(*TranscriptResult); i {
+ switch v := v.(*StoresFindResult); i {
case 0:
return &v.state
case 1:
@@ -2153,7 +2649,7 @@ func file_backend_proto_init() {
}
}
file_backend_proto_msgTypes[8].Exporter = func(v interface{}, i int) interface{} {
- switch v := v.(*TranscriptSegment); i {
+ switch v := v.(*HealthMessage); i {
case 0:
return &v.state
case 1:
@@ -2165,7 +2661,7 @@ func file_backend_proto_init() {
}
}
file_backend_proto_msgTypes[9].Exporter = func(v interface{}, i int) interface{} {
- switch v := v.(*GenerateImageRequest); i {
+ switch v := v.(*PredictOptions); i {
case 0:
return &v.state
case 1:
@@ -2177,7 +2673,7 @@ func file_backend_proto_init() {
}
}
file_backend_proto_msgTypes[10].Exporter = func(v interface{}, i int) interface{} {
- switch v := v.(*TTSRequest); i {
+ switch v := v.(*Reply); i {
case 0:
return &v.state
case 1:
@@ -2189,7 +2685,7 @@ func file_backend_proto_init() {
}
}
file_backend_proto_msgTypes[11].Exporter = func(v interface{}, i int) interface{} {
- switch v := v.(*TokenizationResponse); i {
+ switch v := v.(*ModelOptions); i {
case 0:
return &v.state
case 1:
@@ -2201,7 +2697,7 @@ func file_backend_proto_init() {
}
}
file_backend_proto_msgTypes[12].Exporter = func(v interface{}, i int) interface{} {
- switch v := v.(*MemoryUsageData); i {
+ switch v := v.(*Result); i {
case 0:
return &v.state
case 1:
@@ -2213,6 +2709,102 @@ func file_backend_proto_init() {
}
}
file_backend_proto_msgTypes[13].Exporter = func(v interface{}, i int) interface{} {
+ switch v := v.(*EmbeddingResult); i {
+ case 0:
+ return &v.state
+ case 1:
+ return &v.sizeCache
+ case 2:
+ return &v.unknownFields
+ default:
+ return nil
+ }
+ }
+ file_backend_proto_msgTypes[14].Exporter = func(v interface{}, i int) interface{} {
+ switch v := v.(*TranscriptRequest); i {
+ case 0:
+ return &v.state
+ case 1:
+ return &v.sizeCache
+ case 2:
+ return &v.unknownFields
+ default:
+ return nil
+ }
+ }
+ file_backend_proto_msgTypes[15].Exporter = func(v interface{}, i int) interface{} {
+ switch v := v.(*TranscriptResult); i {
+ case 0:
+ return &v.state
+ case 1:
+ return &v.sizeCache
+ case 2:
+ return &v.unknownFields
+ default:
+ return nil
+ }
+ }
+ file_backend_proto_msgTypes[16].Exporter = func(v interface{}, i int) interface{} {
+ switch v := v.(*TranscriptSegment); i {
+ case 0:
+ return &v.state
+ case 1:
+ return &v.sizeCache
+ case 2:
+ return &v.unknownFields
+ default:
+ return nil
+ }
+ }
+ file_backend_proto_msgTypes[17].Exporter = func(v interface{}, i int) interface{} {
+ switch v := v.(*GenerateImageRequest); i {
+ case 0:
+ return &v.state
+ case 1:
+ return &v.sizeCache
+ case 2:
+ return &v.unknownFields
+ default:
+ return nil
+ }
+ }
+ file_backend_proto_msgTypes[18].Exporter = func(v interface{}, i int) interface{} {
+ switch v := v.(*TTSRequest); i {
+ case 0:
+ return &v.state
+ case 1:
+ return &v.sizeCache
+ case 2:
+ return &v.unknownFields
+ default:
+ return nil
+ }
+ }
+ file_backend_proto_msgTypes[19].Exporter = func(v interface{}, i int) interface{} {
+ switch v := v.(*TokenizationResponse); i {
+ case 0:
+ return &v.state
+ case 1:
+ return &v.sizeCache
+ case 2:
+ return &v.unknownFields
+ default:
+ return nil
+ }
+ }
+ file_backend_proto_msgTypes[20].Exporter = func(v interface{}, i int) interface{} {
+ switch v := v.(*MemoryUsageData); i {
+ case 0:
+ return &v.state
+ case 1:
+ return &v.sizeCache
+ case 2:
+ return &v.unknownFields
+ default:
+ return nil
+ }
+ }
+ file_backend_proto_msgTypes[21].Exporter = func(v interface{}, i int) interface{} {
switch v := v.(*StatusResponse); i {
case 0:
return &v.state
@@ -2231,7 +2823,7 @@ func file_backend_proto_init() {
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
RawDescriptor: file_backend_proto_rawDesc,
NumEnums: 1,
- NumMessages: 15,
+ NumMessages: 23,
NumExtensions: 0,
NumServices: 1,
},
diff --git a/pkg/grpc/proto/backend_grpc.pb.go b/pkg/grpc/proto/backend_grpc.pb.go
index ef5187bc..0314cd4e 100644
--- a/pkg/grpc/proto/backend_grpc.pb.go
+++ b/pkg/grpc/proto/backend_grpc.pb.go
@@ -1,6 +1,6 @@
// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
// versions:
-// - protoc-gen-go-grpc v1.2.0
+// - protoc-gen-go-grpc v1.3.0
// - protoc v4.23.4
// source: backend.proto
@@ -18,6 +18,23 @@ import (
// Requires gRPC-Go v1.32.0 or later.
const _ = grpc.SupportPackageIsVersion7
+const (
+ Backend_Health_FullMethodName = "/backend.Backend/Health"
+ Backend_Predict_FullMethodName = "/backend.Backend/Predict"
+ Backend_LoadModel_FullMethodName = "/backend.Backend/LoadModel"
+ Backend_PredictStream_FullMethodName = "/backend.Backend/PredictStream"
+ Backend_Embedding_FullMethodName = "/backend.Backend/Embedding"
+ Backend_GenerateImage_FullMethodName = "/backend.Backend/GenerateImage"
+ Backend_AudioTranscription_FullMethodName = "/backend.Backend/AudioTranscription"
+ Backend_TTS_FullMethodName = "/backend.Backend/TTS"
+ Backend_TokenizeString_FullMethodName = "/backend.Backend/TokenizeString"
+ Backend_Status_FullMethodName = "/backend.Backend/Status"
+ Backend_StoresSet_FullMethodName = "/backend.Backend/StoresSet"
+ Backend_StoresDelete_FullMethodName = "/backend.Backend/StoresDelete"
+ Backend_StoresGet_FullMethodName = "/backend.Backend/StoresGet"
+ Backend_StoresFind_FullMethodName = "/backend.Backend/StoresFind"
+)
+
// BackendClient is the client API for Backend service.
//
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
@@ -32,6 +49,10 @@ type BackendClient interface {
TTS(ctx context.Context, in *TTSRequest, opts ...grpc.CallOption) (*Result, error)
TokenizeString(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*TokenizationResponse, error)
Status(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*StatusResponse, error)
+ StoresSet(ctx context.Context, in *StoresSetOptions, opts ...grpc.CallOption) (*Result, error)
+ StoresDelete(ctx context.Context, in *StoresDeleteOptions, opts ...grpc.CallOption) (*Result, error)
+ StoresGet(ctx context.Context, in *StoresGetOptions, opts ...grpc.CallOption) (*StoresGetResult, error)
+ StoresFind(ctx context.Context, in *StoresFindOptions, opts ...grpc.CallOption) (*StoresFindResult, error)
}
type backendClient struct {
@@ -44,7 +65,7 @@ func NewBackendClient(cc grpc.ClientConnInterface) BackendClient {
func (c *backendClient) Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error) {
out := new(Reply)
- err := c.cc.Invoke(ctx, "/backend.Backend/Health", in, out, opts...)
+ err := c.cc.Invoke(ctx, Backend_Health_FullMethodName, in, out, opts...)
if err != nil {
return nil, err
}
@@ -53,7 +74,7 @@ func (c *backendClient) Health(ctx context.Context, in *HealthMessage, opts ...g
func (c *backendClient) Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error) {
out := new(Reply)
- err := c.cc.Invoke(ctx, "/backend.Backend/Predict", in, out, opts...)
+ err := c.cc.Invoke(ctx, Backend_Predict_FullMethodName, in, out, opts...)
if err != nil {
return nil, err
}
@@ -62,7 +83,7 @@ func (c *backendClient) Predict(ctx context.Context, in *PredictOptions, opts ..
func (c *backendClient) LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error) {
out := new(Result)
- err := c.cc.Invoke(ctx, "/backend.Backend/LoadModel", in, out, opts...)
+ err := c.cc.Invoke(ctx, Backend_LoadModel_FullMethodName, in, out, opts...)
if err != nil {
return nil, err
}
@@ -70,7 +91,7 @@ func (c *backendClient) LoadModel(ctx context.Context, in *ModelOptions, opts ..
}
func (c *backendClient) PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (Backend_PredictStreamClient, error) {
- stream, err := c.cc.NewStream(ctx, &Backend_ServiceDesc.Streams[0], "/backend.Backend/PredictStream", opts...)
+ stream, err := c.cc.NewStream(ctx, &Backend_ServiceDesc.Streams[0], Backend_PredictStream_FullMethodName, opts...)
if err != nil {
return nil, err
}
@@ -103,7 +124,7 @@ func (x *backendPredictStreamClient) Recv() (*Reply, error) {
func (c *backendClient) Embedding(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*EmbeddingResult, error) {
out := new(EmbeddingResult)
- err := c.cc.Invoke(ctx, "/backend.Backend/Embedding", in, out, opts...)
+ err := c.cc.Invoke(ctx, Backend_Embedding_FullMethodName, in, out, opts...)
if err != nil {
return nil, err
}
@@ -112,7 +133,7 @@ func (c *backendClient) Embedding(ctx context.Context, in *PredictOptions, opts
func (c *backendClient) GenerateImage(ctx context.Context, in *GenerateImageRequest, opts ...grpc.CallOption) (*Result, error) {
out := new(Result)
- err := c.cc.Invoke(ctx, "/backend.Backend/GenerateImage", in, out, opts...)
+ err := c.cc.Invoke(ctx, Backend_GenerateImage_FullMethodName, in, out, opts...)
if err != nil {
return nil, err
}
@@ -121,7 +142,7 @@ func (c *backendClient) GenerateImage(ctx context.Context, in *GenerateImageRequ
func (c *backendClient) AudioTranscription(ctx context.Context, in *TranscriptRequest, opts ...grpc.CallOption) (*TranscriptResult, error) {
out := new(TranscriptResult)
- err := c.cc.Invoke(ctx, "/backend.Backend/AudioTranscription", in, out, opts...)
+ err := c.cc.Invoke(ctx, Backend_AudioTranscription_FullMethodName, in, out, opts...)
if err != nil {
return nil, err
}
@@ -130,7 +151,7 @@ func (c *backendClient) AudioTranscription(ctx context.Context, in *TranscriptRe
func (c *backendClient) TTS(ctx context.Context, in *TTSRequest, opts ...grpc.CallOption) (*Result, error) {
out := new(Result)
- err := c.cc.Invoke(ctx, "/backend.Backend/TTS", in, out, opts...)
+ err := c.cc.Invoke(ctx, Backend_TTS_FullMethodName, in, out, opts...)
if err != nil {
return nil, err
}
@@ -139,7 +160,7 @@ func (c *backendClient) TTS(ctx context.Context, in *TTSRequest, opts ...grpc.Ca
func (c *backendClient) TokenizeString(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*TokenizationResponse, error) {
out := new(TokenizationResponse)
- err := c.cc.Invoke(ctx, "/backend.Backend/TokenizeString", in, out, opts...)
+ err := c.cc.Invoke(ctx, Backend_TokenizeString_FullMethodName, in, out, opts...)
if err != nil {
return nil, err
}
@@ -148,7 +169,43 @@ func (c *backendClient) TokenizeString(ctx context.Context, in *PredictOptions,
func (c *backendClient) Status(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*StatusResponse, error) {
out := new(StatusResponse)
- err := c.cc.Invoke(ctx, "/backend.Backend/Status", in, out, opts...)
+ err := c.cc.Invoke(ctx, Backend_Status_FullMethodName, in, out, opts...)
+ if err != nil {
+ return nil, err
+ }
+ return out, nil
+}
+
+func (c *backendClient) StoresSet(ctx context.Context, in *StoresSetOptions, opts ...grpc.CallOption) (*Result, error) {
+ out := new(Result)
+ err := c.cc.Invoke(ctx, Backend_StoresSet_FullMethodName, in, out, opts...)
+ if err != nil {
+ return nil, err
+ }
+ return out, nil
+}
+
+func (c *backendClient) StoresDelete(ctx context.Context, in *StoresDeleteOptions, opts ...grpc.CallOption) (*Result, error) {
+ out := new(Result)
+ err := c.cc.Invoke(ctx, Backend_StoresDelete_FullMethodName, in, out, opts...)
+ if err != nil {
+ return nil, err
+ }
+ return out, nil
+}
+
+func (c *backendClient) StoresGet(ctx context.Context, in *StoresGetOptions, opts ...grpc.CallOption) (*StoresGetResult, error) {
+ out := new(StoresGetResult)
+ err := c.cc.Invoke(ctx, Backend_StoresGet_FullMethodName, in, out, opts...)
+ if err != nil {
+ return nil, err
+ }
+ return out, nil
+}
+
+func (c *backendClient) StoresFind(ctx context.Context, in *StoresFindOptions, opts ...grpc.CallOption) (*StoresFindResult, error) {
+ out := new(StoresFindResult)
+ err := c.cc.Invoke(ctx, Backend_StoresFind_FullMethodName, in, out, opts...)
if err != nil {
return nil, err
}
@@ -169,6 +226,10 @@ type BackendServer interface {
TTS(context.Context, *TTSRequest) (*Result, error)
TokenizeString(context.Context, *PredictOptions) (*TokenizationResponse, error)
Status(context.Context, *HealthMessage) (*StatusResponse, error)
+ StoresSet(context.Context, *StoresSetOptions) (*Result, error)
+ StoresDelete(context.Context, *StoresDeleteOptions) (*Result, error)
+ StoresGet(context.Context, *StoresGetOptions) (*StoresGetResult, error)
+ StoresFind(context.Context, *StoresFindOptions) (*StoresFindResult, error)
mustEmbedUnimplementedBackendServer()
}
@@ -206,6 +267,18 @@ func (UnimplementedBackendServer) TokenizeString(context.Context, *PredictOption
func (UnimplementedBackendServer) Status(context.Context, *HealthMessage) (*StatusResponse, error) {
return nil, status.Errorf(codes.Unimplemented, "method Status not implemented")
}
+func (UnimplementedBackendServer) StoresSet(context.Context, *StoresSetOptions) (*Result, error) {
+ return nil, status.Errorf(codes.Unimplemented, "method StoresSet not implemented")
+}
+func (UnimplementedBackendServer) StoresDelete(context.Context, *StoresDeleteOptions) (*Result, error) {
+ return nil, status.Errorf(codes.Unimplemented, "method StoresDelete not implemented")
+}
+func (UnimplementedBackendServer) StoresGet(context.Context, *StoresGetOptions) (*StoresGetResult, error) {
+ return nil, status.Errorf(codes.Unimplemented, "method StoresGet not implemented")
+}
+func (UnimplementedBackendServer) StoresFind(context.Context, *StoresFindOptions) (*StoresFindResult, error) {
+ return nil, status.Errorf(codes.Unimplemented, "method StoresFind not implemented")
+}
func (UnimplementedBackendServer) mustEmbedUnimplementedBackendServer() {}
// UnsafeBackendServer may be embedded to opt out of forward compatibility for this service.
@@ -229,7 +302,7 @@ func _Backend_Health_Handler(srv interface{}, ctx context.Context, dec func(inte
}
info := &grpc.UnaryServerInfo{
Server: srv,
- FullMethod: "/backend.Backend/Health",
+ FullMethod: Backend_Health_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(BackendServer).Health(ctx, req.(*HealthMessage))
@@ -247,7 +320,7 @@ func _Backend_Predict_Handler(srv interface{}, ctx context.Context, dec func(int
}
info := &grpc.UnaryServerInfo{
Server: srv,
- FullMethod: "/backend.Backend/Predict",
+ FullMethod: Backend_Predict_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(BackendServer).Predict(ctx, req.(*PredictOptions))
@@ -265,7 +338,7 @@ func _Backend_LoadModel_Handler(srv interface{}, ctx context.Context, dec func(i
}
info := &grpc.UnaryServerInfo{
Server: srv,
- FullMethod: "/backend.Backend/LoadModel",
+ FullMethod: Backend_LoadModel_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(BackendServer).LoadModel(ctx, req.(*ModelOptions))
@@ -304,7 +377,7 @@ func _Backend_Embedding_Handler(srv interface{}, ctx context.Context, dec func(i
}
info := &grpc.UnaryServerInfo{
Server: srv,
- FullMethod: "/backend.Backend/Embedding",
+ FullMethod: Backend_Embedding_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(BackendServer).Embedding(ctx, req.(*PredictOptions))
@@ -322,7 +395,7 @@ func _Backend_GenerateImage_Handler(srv interface{}, ctx context.Context, dec fu
}
info := &grpc.UnaryServerInfo{
Server: srv,
- FullMethod: "/backend.Backend/GenerateImage",
+ FullMethod: Backend_GenerateImage_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(BackendServer).GenerateImage(ctx, req.(*GenerateImageRequest))
@@ -340,7 +413,7 @@ func _Backend_AudioTranscription_Handler(srv interface{}, ctx context.Context, d
}
info := &grpc.UnaryServerInfo{
Server: srv,
- FullMethod: "/backend.Backend/AudioTranscription",
+ FullMethod: Backend_AudioTranscription_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(BackendServer).AudioTranscription(ctx, req.(*TranscriptRequest))
@@ -358,7 +431,7 @@ func _Backend_TTS_Handler(srv interface{}, ctx context.Context, dec func(interfa
}
info := &grpc.UnaryServerInfo{
Server: srv,
- FullMethod: "/backend.Backend/TTS",
+ FullMethod: Backend_TTS_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(BackendServer).TTS(ctx, req.(*TTSRequest))
@@ -376,7 +449,7 @@ func _Backend_TokenizeString_Handler(srv interface{}, ctx context.Context, dec f
}
info := &grpc.UnaryServerInfo{
Server: srv,
- FullMethod: "/backend.Backend/TokenizeString",
+ FullMethod: Backend_TokenizeString_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(BackendServer).TokenizeString(ctx, req.(*PredictOptions))
@@ -394,7 +467,7 @@ func _Backend_Status_Handler(srv interface{}, ctx context.Context, dec func(inte
}
info := &grpc.UnaryServerInfo{
Server: srv,
- FullMethod: "/backend.Backend/Status",
+ FullMethod: Backend_Status_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(BackendServer).Status(ctx, req.(*HealthMessage))
@@ -402,6 +475,78 @@ func _Backend_Status_Handler(srv interface{}, ctx context.Context, dec func(inte
return interceptor(ctx, in, info, handler)
}
+func _Backend_StoresSet_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
+ in := new(StoresSetOptions)
+ if err := dec(in); err != nil {
+ return nil, err
+ }
+ if interceptor == nil {
+ return srv.(BackendServer).StoresSet(ctx, in)
+ }
+ info := &grpc.UnaryServerInfo{
+ Server: srv,
+ FullMethod: Backend_StoresSet_FullMethodName,
+ }
+ handler := func(ctx context.Context, req interface{}) (interface{}, error) {
+ return srv.(BackendServer).StoresSet(ctx, req.(*StoresSetOptions))
+ }
+ return interceptor(ctx, in, info, handler)
+}
+
+func _Backend_StoresDelete_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
+ in := new(StoresDeleteOptions)
+ if err := dec(in); err != nil {
+ return nil, err
+ }
+ if interceptor == nil {
+ return srv.(BackendServer).StoresDelete(ctx, in)
+ }
+ info := &grpc.UnaryServerInfo{
+ Server: srv,
+ FullMethod: Backend_StoresDelete_FullMethodName,
+ }
+ handler := func(ctx context.Context, req interface{}) (interface{}, error) {
+ return srv.(BackendServer).StoresDelete(ctx, req.(*StoresDeleteOptions))
+ }
+ return interceptor(ctx, in, info, handler)
+}
+
+func _Backend_StoresGet_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
+ in := new(StoresGetOptions)
+ if err := dec(in); err != nil {
+ return nil, err
+ }
+ if interceptor == nil {
+ return srv.(BackendServer).StoresGet(ctx, in)
+ }
+ info := &grpc.UnaryServerInfo{
+ Server: srv,
+ FullMethod: Backend_StoresGet_FullMethodName,
+ }
+ handler := func(ctx context.Context, req interface{}) (interface{}, error) {
+ return srv.(BackendServer).StoresGet(ctx, req.(*StoresGetOptions))
+ }
+ return interceptor(ctx, in, info, handler)
+}
+
+func _Backend_StoresFind_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
+ in := new(StoresFindOptions)
+ if err := dec(in); err != nil {
+ return nil, err
+ }
+ if interceptor == nil {
+ return srv.(BackendServer).StoresFind(ctx, in)
+ }
+ info := &grpc.UnaryServerInfo{
+ Server: srv,
+ FullMethod: Backend_StoresFind_FullMethodName,
+ }
+ handler := func(ctx context.Context, req interface{}) (interface{}, error) {
+ return srv.(BackendServer).StoresFind(ctx, req.(*StoresFindOptions))
+ }
+ return interceptor(ctx, in, info, handler)
+}
+
// Backend_ServiceDesc is the grpc.ServiceDesc for Backend service.
// It's only intended for direct use with grpc.RegisterService,
// and not to be introspected or modified (even as a copy)
@@ -445,6 +590,22 @@ var Backend_ServiceDesc = grpc.ServiceDesc{
MethodName: "Status",
Handler: _Backend_Status_Handler,
},
+ {
+ MethodName: "StoresSet",
+ Handler: _Backend_StoresSet_Handler,
+ },
+ {
+ MethodName: "StoresDelete",
+ Handler: _Backend_StoresDelete_Handler,
+ },
+ {
+ MethodName: "StoresGet",
+ Handler: _Backend_StoresGet_Handler,
+ },
+ {
+ MethodName: "StoresFind",
+ Handler: _Backend_StoresFind_Handler,
+ },
},
Streams: []grpc.StreamDesc{
{
diff --git a/pkg/grpc/server.go b/pkg/grpc/server.go
index 07d055d9..8116241f 100644
--- a/pkg/grpc/server.go
+++ b/pkg/grpc/server.go
@@ -167,6 +167,54 @@ func (s *server) Status(ctx context.Context, in *pb.HealthMessage) (*pb.StatusRe
return &res, nil
}
+func (s *server) StoresSet(ctx context.Context, in *pb.StoresSetOptions) (*pb.Result, error) {
+ if s.llm.Locking() {
+ s.llm.Lock()
+ defer s.llm.Unlock()
+ }
+ err := s.llm.StoresSet(in)
+ if err != nil {
+ return &pb.Result{Message: fmt.Sprintf("Error setting entry: %s", err.Error()), Success: false}, err
+ }
+ return &pb.Result{Message: "Set key", Success: true}, nil
+}
+
+func (s *server) StoresDelete(ctx context.Context, in *pb.StoresDeleteOptions) (*pb.Result, error) {
+ if s.llm.Locking() {
+ s.llm.Lock()
+ defer s.llm.Unlock()
+ }
+ err := s.llm.StoresDelete(in)
+ if err != nil {
+ return &pb.Result{Message: fmt.Sprintf("Error deleting entry: %s", err.Error()), Success: false}, err
+ }
+ return &pb.Result{Message: "Deleted key", Success: true}, nil
+}
+
+func (s *server) StoresGet(ctx context.Context, in *pb.StoresGetOptions) (*pb.StoresGetResult, error) {
+ if s.llm.Locking() {
+ s.llm.Lock()
+ defer s.llm.Unlock()
+ }
+ res, err := s.llm.StoresGet(in)
+ if err != nil {
+ return nil, err
+ }
+ return &res, nil
+}
+
+func (s *server) StoresFind(ctx context.Context, in *pb.StoresFindOptions) (*pb.StoresFindResult, error) {
+ if s.llm.Locking() {
+ s.llm.Lock()
+ defer s.llm.Unlock()
+ }
+ res, err := s.llm.StoresFind(in)
+ if err != nil {
+ return nil, err
+ }
+ return &res, nil
+}
+
func StartServer(address string, model LLM) error {
lis, err := net.Listen("tcp", address)
if err != nil {
diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go
index a6a84fd7..85744f9a 100644
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -17,6 +17,7 @@ import (
var Aliases map[string]string = map[string]string{
"go-llama": LLamaCPP,
"llama": LLamaCPP,
+ "embedded-store": LocalStoreBackend,
}
const (
@@ -34,6 +35,8 @@ const (
TinyDreamBackend = "tinydream"
PiperBackend = "piper"
LCHuggingFaceBackend = "langchain-huggingface"
+
+ LocalStoreBackend = "local-store"
)
var AutoLoadBackends []string = []string{
diff --git a/pkg/store/client.go b/pkg/store/client.go
new file mode 100644
index 00000000..8facfdcf
--- /dev/null
+++ b/pkg/store/client.go
@@ -0,0 +1,155 @@
+package store
+
+import (
+ "context"
+ "fmt"
+
+ grpc "github.com/go-skynet/LocalAI/pkg/grpc"
+ "github.com/go-skynet/LocalAI/pkg/grpc/proto"
+)
+
+// Wrapper for the GRPC client so that simple use cases are handled without verbosity
+
+// SetCols sets multiple key-value pairs in the store
+// It's in columnar format so that keys[i] is associated with values[i]
+func SetCols(ctx context.Context, c grpc.Backend, keys [][]float32, values [][]byte) error {
+ protoKeys := make([]*proto.StoresKey, len(keys))
+ for i, k := range keys {
+ protoKeys[i] = &proto.StoresKey{
+ Floats: k,
+ }
+ }
+ protoValues := make([]*proto.StoresValue, len(values))
+ for i, v := range values {
+ protoValues[i] = &proto.StoresValue{
+ Bytes: v,
+ }
+ }
+ setOpts := &proto.StoresSetOptions{
+ Keys: protoKeys,
+ Values: protoValues,
+ }
+
+ res, err := c.StoresSet(ctx, setOpts)
+ if err != nil {
+ return err
+ }
+
+ if res.Success {
+ return nil
+ }
+
+ return fmt.Errorf("failed to set keys: %v", res.Message)
+}
+
+// SetSingle sets a single key-value pair in the store
+// Don't call this in a tight loop, instead use SetCols
+func SetSingle(ctx context.Context, c grpc.Backend, key []float32, value []byte) error {
+ return SetCols(ctx, c, [][]float32{key}, [][]byte{value})
+}
+
+// DeleteCols deletes multiple key-value pairs from the store
+// It's in columnar format so that keys[i] is associated with values[i]
+func DeleteCols(ctx context.Context, c grpc.Backend, keys [][]float32) error {
+ protoKeys := make([]*proto.StoresKey, len(keys))
+ for i, k := range keys {
+ protoKeys[i] = &proto.StoresKey{
+ Floats: k,
+ }
+ }
+ deleteOpts := &proto.StoresDeleteOptions{
+ Keys: protoKeys,
+ }
+
+ res, err := c.StoresDelete(ctx, deleteOpts)
+ if err != nil {
+ return err
+ }
+
+ if res.Success {
+ return nil
+ }
+
+ return fmt.Errorf("failed to delete keys: %v", res.Message)
+}
+
+// DeleteSingle deletes a single key-value pair from the store
+// Don't call this in a tight loop, instead use DeleteCols
+func DeleteSingle(ctx context.Context, c grpc.Backend, key []float32) error {
+ return DeleteCols(ctx, c, [][]float32{key})
+}
+
+// GetCols gets multiple key-value pairs from the store
+// It's in columnar format so that keys[i] is associated with values[i]
+// Be warned the keys are sorted and will be returned in a different order than they were input
+// There is no guarantee as to how the keys are sorted
+func GetCols(ctx context.Context, c grpc.Backend, keys [][]float32) ([][]float32, [][]byte, error) {
+ protoKeys := make([]*proto.StoresKey, len(keys))
+ for i, k := range keys {
+ protoKeys[i] = &proto.StoresKey{
+ Floats: k,
+ }
+ }
+ getOpts := &proto.StoresGetOptions{
+ Keys: protoKeys,
+ }
+
+ res, err := c.StoresGet(ctx, getOpts)
+ if err != nil {
+ return nil, nil, err
+ }
+
+ ks := make([][]float32, len(res.Keys))
+ for i, k := range res.Keys {
+ ks[i] = k.Floats
+ }
+ vs := make([][]byte, len(res.Values))
+ for i, v := range res.Values {
+ vs[i] = v.Bytes
+ }
+
+ return ks, vs, nil
+}
+
+// GetSingle gets a single key-value pair from the store
+// Don't call this in a tight loop, instead use GetCols
+func GetSingle(ctx context.Context, c grpc.Backend, key []float32) ([]byte, error) {
+ _, values, err := GetCols(ctx, c, [][]float32{key})
+ if err != nil {
+ return nil, err
+ }
+
+ if len(values) > 0 {
+ return values[0], nil
+ }
+
+ return nil, fmt.Errorf("failed to get key")
+}
+
+// Find similar keys to the given key. Returns the keys, values, and similarities
+func Find(ctx context.Context, c grpc.Backend, key []float32, topk int) ([][]float32, [][]byte, []float32, error) {
+ findOpts := &proto.StoresFindOptions{
+ Key: &proto.StoresKey{
+ Floats: key,
+ },
+ TopK: int32(topk),
+ }
+
+ res, err := c.StoresFind(ctx, findOpts)
+ if err != nil {
+ return nil, nil, nil, err
+ }
+
+ ks := make([][]float32, len(res.Keys))
+ vs := make([][]byte, len(res.Values))
+
+ for i, k := range res.Keys {
+ ks[i] = k.Floats
+ }
+
+ for i, v := range res.Values {
+ vs[i] = v.Bytes
+ }
+
+ return ks, vs, res.Similarities, nil
+}
diff --git a/tests/integration/integration_suite_test.go b/tests/integration/integration_suite_test.go
new file mode 100644
index 00000000..bbe8b5e0
--- /dev/null
+++ b/tests/integration/integration_suite_test.go
@@ -0,0 +1,17 @@
+package integration_test
+
+import (
+ "os"
+ "testing"
+
+ . "github.com/onsi/ginkgo/v2"
+ . "github.com/onsi/gomega"
+ "github.com/rs/zerolog"
+ "github.com/rs/zerolog/log"
+)
+
+func TestLocalAI(t *testing.T) {
+ log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr})
+ RegisterFailHandler(Fail)
+ RunSpecs(t, "LocalAI test suite")
+}
diff --git a/tests/integration/stores_test.go b/tests/integration/stores_test.go
new file mode 100644
index 00000000..a4ad4f90
--- /dev/null
+++ b/tests/integration/stores_test.go
@@ -0,0 +1,228 @@
+package integration_test
+
+import (
+ "context"
+ "embed"
+ "math"
+ "os"
+ "path/filepath"
+
+ . "github.com/onsi/ginkgo/v2"
+ . "github.com/onsi/gomega"
+ "github.com/rs/zerolog"
+ "github.com/rs/zerolog/log"
+
+ "github.com/go-skynet/LocalAI/core/config"
+ "github.com/go-skynet/LocalAI/pkg/assets"
+ "github.com/go-skynet/LocalAI/pkg/grpc"
+ "github.com/go-skynet/LocalAI/pkg/model"
+ "github.com/go-skynet/LocalAI/pkg/store"
+)
+
+//go:embed backend-assets/*
+var backendAssets embed.FS
+
+var _ = Describe("Integration tests for the stores backend(s) and internal APIs", Label("stores"), func() {
+ Context("Embedded Store get,set and delete", func() {
+ var sl *model.ModelLoader
+ var sc grpc.Backend
+ var tmpdir string
+
+ BeforeEach(func() {
+ var err error
+
+ zerolog.SetGlobalLevel(zerolog.DebugLevel)
+
+ tmpdir, err = os.MkdirTemp("", "")
+ Expect(err).ToNot(HaveOccurred())
+ backendAssetsDir := filepath.Join(tmpdir, "backend-assets")
+ err = os.Mkdir(backendAssetsDir, 0755)
+ Expect(err).ToNot(HaveOccurred())
+
+ err = assets.ExtractFiles(backendAssets, backendAssetsDir)
+ Expect(err).ToNot(HaveOccurred())
+
+ debug := true
+
+ bc := config.BackendConfig{
+ Name: "store test",
+ Debug: &debug,
+ Backend: model.LocalStoreBackend,
+ }
+
+ storeOpts := []model.Option{
+ model.WithBackendString(bc.Backend),
+ model.WithAssetDir(backendAssetsDir),
+ model.WithModel("test"),
+ }
+
+ sl = model.NewModelLoader("")
+ sc, err = sl.BackendLoader(storeOpts...)
+ Expect(err).ToNot(HaveOccurred())
+ Expect(sc).ToNot(BeNil())
+ })
+
+ AfterEach(func() {
+ sl.StopAllGRPC()
+ err := os.RemoveAll(tmpdir)
+ Expect(err).ToNot(HaveOccurred())
+ })
+
+ It("should be able to set a key", func() {
+ err := store.SetSingle(context.Background(), sc, []float32{0.1, 0.2, 0.3}, []byte("test"))
+ Expect(err).ToNot(HaveOccurred())
+ })
+
+ It("should be able to set keys", func() {
+ err := store.SetCols(context.Background(), sc, [][]float32{{0.1, 0.2, 0.3}, {0.4, 0.5, 0.6}}, [][]byte{[]byte("test1"), []byte("test2")})
+ Expect(err).ToNot(HaveOccurred())
+
+ err = store.SetCols(context.Background(), sc, [][]float32{{0.7, 0.8, 0.9}, {0.10, 0.11, 0.12}}, [][]byte{[]byte("test3"), []byte("test4")})
+ Expect(err).ToNot(HaveOccurred())
+ })
+
+ It("should be able to get a key", func() {
+ err := store.SetSingle(context.Background(), sc, []float32{0.1, 0.2, 0.3}, []byte("test"))
+ Expect(err).ToNot(HaveOccurred())
+
+ val, err := store.GetSingle(context.Background(), sc, []float32{0.1, 0.2, 0.3})
+ Expect(err).ToNot(HaveOccurred())
+ Expect(val).To(Equal([]byte("test")))
+ })
+
+ It("should be able to get keys", func() {
+ //set 3 entries
+ err := store.SetCols(context.Background(), sc, [][]float32{{0.1, 0.2, 0.3}, {0.4, 0.5, 0.6}, {0.7, 0.8, 0.9}}, [][]byte{[]byte("test1"), []byte("test2"), []byte("test3")})
+ Expect(err).ToNot(HaveOccurred())
+
+ //get 3 entries
+ keys, vals, err := store.GetCols(context.Background(), sc, [][]float32{{0.1, 0.2, 0.3}, {0.4, 0.5, 0.6}, {0.7, 0.8, 0.9}})
+ Expect(err).ToNot(HaveOccurred())
+ Expect(keys).To(HaveLen(3))
+ Expect(vals).To(HaveLen(3))
+ for i, k := range keys {
+ v := vals[i]
+
+ if k[0] == 0.1 && k[1] == 0.2 && k[2] == 0.3 {
+ Expect(v).To(Equal([]byte("test1")))
+ } else if k[0] == 0.4 && k[1] == 0.5 && k[2] == 0.6 {
+ Expect(v).To(Equal([]byte("test2")))
+ } else {
+ Expect(k).To(Equal([]float32{0.7, 0.8, 0.9}))
+ Expect(v).To(Equal([]byte("test3")))
+ }
+ }
+
+ //get 2 entries
+ keys, vals, err = store.GetCols(context.Background(), sc, [][]float32{{0.7, 0.8, 0.9}, {0.1, 0.2, 0.3}})
+ Expect(err).ToNot(HaveOccurred())
+ Expect(keys).To(HaveLen(2))
+ Expect(vals).To(HaveLen(2))
+ for i, k := range keys {
+ v := vals[i]
+
+ if k[0] == 0.1 && k[1] == 0.2 && k[2] == 0.3 {
+ Expect(v).To(Equal([]byte("test1")))
+ } else {
+ Expect(k).To(Equal([]float32{0.7, 0.8, 0.9}))
+ Expect(v).To(Equal([]byte("test3")))
+ }
+ }
+ })
+
+ It("should be able to delete a key", func() {
+ err := store.SetSingle(context.Background(), sc, []float32{0.1, 0.2, 0.3}, []byte("test"))
+ Expect(err).ToNot(HaveOccurred())
+
+ err = store.DeleteSingle(context.Background(), sc, []float32{0.1, 0.2, 0.3})
+ Expect(err).ToNot(HaveOccurred())
+
+ val, _ := store.GetSingle(context.Background(), sc, []float32{0.1, 0.2, 0.3})
+ Expect(val).To(BeNil())
+ })
+
+ It("should be able to delete keys", func() {
+ //set 3 entries
+ err := store.SetCols(context.Background(), sc, [][]float32{{0.1, 0.2, 0.3}, {0.4, 0.5, 0.6}, {0.7, 0.8, 0.9}}, [][]byte{[]byte("test1"), []byte("test2"), []byte("test3")})
+ Expect(err).ToNot(HaveOccurred())
+
+ //delete 2 entries
+ err = store.DeleteCols(context.Background(), sc, [][]float32{{0.1, 0.2, 0.3}, {0.7, 0.8, 0.9}})
+ Expect(err).ToNot(HaveOccurred())
+
+ //get 1 entry
+ keys, vals, err := store.GetCols(context.Background(), sc, [][]float32{{0.4, 0.5, 0.6}})
+ Expect(err).ToNot(HaveOccurred())
+ Expect(keys).To(HaveLen(1))
+ Expect(vals).To(HaveLen(1))
+ Expect(keys[0]).To(Equal([]float32{0.4, 0.5, 0.6}))
+ Expect(vals[0]).To(Equal([]byte("test2")))
+
+ //get deleted entries
+ keys, vals, err = store.GetCols(context.Background(), sc, [][]float32{{0.1, 0.2, 0.3}, {0.7, 0.8, 0.9}})
+ Expect(err).ToNot(HaveOccurred())
+ Expect(keys).To(HaveLen(0))
+ Expect(vals).To(HaveLen(0))
+ })
+
+ It("should be able to find smilar keys", func() {
+ // set 3 vectors that are at varying angles to {0.5, 0.5, 0.5}
+ err := store.SetCols(context.Background(), sc, [][]float32{{0.5, 0.5, 0.5}, {0.6, 0.6, -0.6}, {0.7, -0.7, -0.7}}, [][]byte{[]byte("test1"), []byte("test2"), []byte("test3")})
+ Expect(err).ToNot(HaveOccurred())
+
+ // find similar keys
+ keys, vals, sims, err := store.Find(context.Background(), sc, []float32{0.1, 0.3, 0.5}, 2)
+ Expect(err).ToNot(HaveOccurred())
+ Expect(keys).To(HaveLen(2))
+ Expect(vals).To(HaveLen(2))
+ Expect(sims).To(HaveLen(2))
+
+ for i, k := range keys {
+ s := sims[i]
+ log.Debug().Float32("similarity", s).Msgf("key: %v", k)
+ }
+
+ Expect(keys[0]).To(Equal([]float32{0.5, 0.5, 0.5}))
+ Expect(vals[0]).To(Equal([]byte("test1")))
+ Expect(keys[1]).To(Equal([]float32{0.6, 0.6, -0.6}))
+ })
+
+ It("should be able to find similar normalized keys", func() {
+ // set 3 vectors that are at varying angles to {0.5, 0.5, 0.5}
+ keys := [][]float32{{0.1, 0.3, 0.5}, {0.5, 0.5, 0.5}, {0.6, 0.6, -0.6}, {0.7, -0.7, -0.7}}
+ vals := [][]byte{[]byte("test0"), []byte("test1"), []byte("test2"), []byte("test3")}
+ // normalize the keys
+ for i, k := range keys {
+ norm := float64(0)
+ for _, x := range k {
+ norm += float64(x * x)
+ }
+ norm = math.Sqrt(norm)
+ for j, x := range k {
+ keys[i][j] = x / float32(norm)
+ }
+ }
+
+ err := store.SetCols(context.Background(), sc, keys, vals)
+ Expect(err).ToNot(HaveOccurred())
+
+ // find similar keys
+ ks, vals, sims, err := store.Find(context.Background(), sc, keys[0], 3)
+ Expect(err).ToNot(HaveOccurred())
+ Expect(ks).To(HaveLen(3))
+ Expect(vals).To(HaveLen(3))
+ Expect(sims).To(HaveLen(3))
+
+ for i, k := range ks {
+ s := sims[i]
+ log.Debug().Float32("similarity", s).Msgf("key: %v", k)
+ }
+
+ Expect(ks[0]).To(Equal(keys[0]))
+ Expect(vals[0]).To(Equal(vals[0]))
+ Expect(sims[0]).To(BeNumerically("~", 1, 0.0001))
+ Expect(ks[1]).To(Equal(keys[1]))
+ Expect(vals[1]).To(Equal(vals[1]))
+ })
+ })
+})
From a922119c41b40f3e3b7cfaa0189b25e94e5d9a2c Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sat, 23 Mar 2024 09:23:28 +0100
Subject: [PATCH 0028/2750] :arrow_up: Update ggerganov/llama.cpp (#1881)
Signed-off-by: GitHub
Co-authored-by: mudler
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index 94b5570c..2f50f362 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=d0a71233fbf8ade8ef06ad8e6b81d1d7b254895f
+CPPLLAMA_VERSION?=56a00f0a2f48a85376f48b5ce77699df781631ae
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
From 1f501cc1eff7dabf7ac829697214dd726a538e70 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Sat, 23 Mar 2024 10:42:14 +0100
Subject: [PATCH 0029/2750] Update README.md
Signed-off-by: Ettore Di Giacinto
---
README.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index c58428f7..b101ee7f 100644
--- a/README.md
+++ b/README.md
@@ -43,14 +43,14 @@
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
-- All-in-one image: https://github.com/mudler/LocalAI/issues/1855
+- Vector store: https://github.com/mudler/LocalAI/pull/1795
+- All-in-one container image: https://github.com/mudler/LocalAI/issues/1855
- Parallel function calling: https://github.com/mudler/LocalAI/pull/1726
- Upload file API: https://github.com/mudler/LocalAI/pull/1703
- Tools API support: https://github.com/mudler/LocalAI/pull/1715
- LLaVa 1.6: https://github.com/mudler/LocalAI/pull/1714
- ROCm container images: https://github.com/mudler/LocalAI/pull/1595
- Intel GPU support (sycl, transformers, diffusers): https://github.com/mudler/LocalAI/issues/1653
-- Deprecation of old backends: https://github.com/mudler/LocalAI/issues/1651
- Mamba support: https://github.com/mudler/LocalAI/pull/1589
- Start and share models with config file: https://github.com/mudler/LocalAI/pull/1522
- 🐸 Coqui: https://github.com/mudler/LocalAI/pull/1489
From 8495750cb8b147babb67d9111a7d9c692b69e3e1 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Sat, 23 Mar 2024 15:22:26 +0100
Subject: [PATCH 0030/2750] Update release.yml
Signed-off-by: Ettore Di Giacinto
---
.github/release.yml | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/.github/release.yml b/.github/release.yml
index c86866c5..8c2c11f9 100644
--- a/.github/release.yml
+++ b/.github/release.yml
@@ -12,13 +12,23 @@ changelog:
- title: "Bug fixes :bug:"
labels:
- bug
+ - regression
- title: Exciting New Features 🎉
labels:
- Semver-Minor
- enhancement
+ - ux
+ - roadmap
+ - title: 🧠 Models
+ labels:
+ - area/ai-model
+ - title: 📖 Documentation and examples
+ labels:
+ - kind/documentation
+ - examples
- title: 👒 Dependencies
labels:
- dependencies
- title: Other Changes
labels:
- - "*"
\ No newline at end of file
+ - "*"
From d9456f2a23e8d3a2250909c27c203e44084fc746 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Sat, 23 Mar 2024 15:54:14 +0100
Subject: [PATCH 0031/2750] ci(aio): publish hipblas and Intel GPU images
(#1883)
Signed-off-by: Ettore Di Giacinto
---
.github/workflows/image.yml | 3 +++
1 file changed, 3 insertions(+)
diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index 8e2bbbdd..484e505f 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -114,6 +114,7 @@ jobs:
tag-suffix: '-hipblas-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
+ aio: "-aio-gpu-hipblas"
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
runs-on: 'arc-runner-set'
- build-type: 'hipblas'
@@ -132,6 +133,7 @@ jobs:
ffmpeg: 'true'
image-type: 'extras'
runs-on: 'arc-runner-set'
+ aio: "-aio-gpu-intel-f16"
- build-type: 'sycl_f32'
platforms: 'linux/amd64'
tag-latest: 'false'
@@ -140,6 +142,7 @@ jobs:
ffmpeg: 'true'
image-type: 'extras'
runs-on: 'arc-runner-set'
+ aio: "-aio-gpu-intel-f32"
# Core images
- build-type: 'sycl_f16'
platforms: 'linux/amd64'
From 49cec7fd6162d0d997b4fee938c26c1d8a275847 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Sat, 23 Mar 2024 16:08:32 +0100
Subject: [PATCH 0032/2750] ci(aio): add latest tag images (#1884)
Tangentially also fixes #1868
---
.github/workflows/image.yml | 14 +++++++-------
.github/workflows/image_build.yml | 2 ++
2 files changed, 9 insertions(+), 7 deletions(-)
diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index 484e505f..6e93cb9a 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -51,7 +51,7 @@ jobs:
base-image: "ubuntu:22.04"
- build-type: ''
platforms: 'linux/amd64'
- tag-latest: 'false'
+ tag-latest: 'auto'
tag-suffix: '-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
@@ -81,7 +81,7 @@ jobs:
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
- tag-latest: 'false'
+ tag-latest: 'auto'
tag-suffix: '-cublas-cuda11-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
@@ -92,7 +92,7 @@ jobs:
cuda-major-version: "12"
cuda-minor-version: "1"
platforms: 'linux/amd64'
- tag-latest: 'false'
+ tag-latest: 'auto'
tag-suffix: '-cublas-cuda12-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
@@ -110,7 +110,7 @@ jobs:
runs-on: 'arc-runner-set'
- build-type: 'hipblas'
platforms: 'linux/amd64'
- tag-latest: 'false'
+ tag-latest: 'auto'
tag-suffix: '-hipblas-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
@@ -127,7 +127,7 @@ jobs:
runs-on: 'arc-runner-set'
- build-type: 'sycl_f16'
platforms: 'linux/amd64'
- tag-latest: 'false'
+ tag-latest: 'auto'
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
tag-suffix: '-sycl-f16-ffmpeg'
ffmpeg: 'true'
@@ -136,7 +136,7 @@ jobs:
aio: "-aio-gpu-intel-f16"
- build-type: 'sycl_f32'
platforms: 'linux/amd64'
- tag-latest: 'false'
+ tag-latest: 'auto'
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
tag-suffix: '-sycl-f32-ffmpeg'
ffmpeg: 'true'
@@ -218,7 +218,7 @@ jobs:
include:
- build-type: ''
platforms: 'linux/amd64'
- tag-latest: 'false'
+ tag-latest: 'auto'
tag-suffix: '-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml
index 8f1386c6..659f85de 100644
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -145,6 +145,7 @@ jobs:
type=ref,event=branch
type=semver,pattern={{raw}}
flavor: |
+ latest=${{ inputs.tag-latest }}
suffix=${{ inputs.aio }}
- name: Docker meta AIO (dockerhub)
if: inputs.aio != ''
@@ -157,6 +158,7 @@ jobs:
type=ref,event=branch
type=semver,pattern={{raw}}
flavor: |
+ latest=${{ inputs.tag-latest }}
suffix=${{ inputs.aio }}
- name: Set up QEMU
uses: docker/setup-qemu-action@master
From bd25d8049c611b5ee5329cdcdc2eb0c6317f29bb Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Sat, 23 Mar 2024 16:19:57 +0100
Subject: [PATCH 0033/2750] fix(watchdog): use ShutdownModel instead of
StopModel (#1882)
Fixes #1760
---
pkg/model/loader.go | 4 ++--
pkg/model/watchdog.go | 9 +++++----
2 files changed, 7 insertions(+), 6 deletions(-)
diff --git a/pkg/model/loader.go b/pkg/model/loader.go
index c2c9df0e..003d8327 100644
--- a/pkg/model/loader.go
+++ b/pkg/model/loader.go
@@ -155,10 +155,10 @@ func (ml *ModelLoader) ShutdownModel(modelName string) error {
ml.mu.Lock()
defer ml.mu.Unlock()
- return ml.StopModel(modelName)
+ return ml.stopModel(modelName)
}
-func (ml *ModelLoader) StopModel(modelName string) error {
+func (ml *ModelLoader) stopModel(modelName string) error {
defer ml.deleteProcess(modelName)
if _, ok := ml.models[modelName]; !ok {
return fmt.Errorf("model %s not found", modelName)
diff --git a/pkg/model/watchdog.go b/pkg/model/watchdog.go
index cf313180..c93cb99a 100644
--- a/pkg/model/watchdog.go
+++ b/pkg/model/watchdog.go
@@ -30,7 +30,7 @@ type WatchDog struct {
}
type ProcessManager interface {
- StopModel(modelName string) error
+ ShutdownModel(modelName string) error
}
func NewWatchDog(pm ProcessManager, timeoutBusy, timeoutIdle time.Duration, busy, idle bool) *WatchDog {
@@ -112,9 +112,10 @@ func (wd *WatchDog) checkIdle() {
log.Warn().Msgf("[WatchDog] Address %s is idle for too long, killing it", address)
p, ok := wd.addressModelMap[address]
if ok {
- if err := wd.pm.StopModel(p); err != nil {
+ if err := wd.pm.ShutdownModel(p); err != nil {
log.Error().Msgf("[watchdog] Error shutting down model %s: %v", p, err)
}
+ log.Debug().Msgf("[WatchDog] model shut down: %s", address)
delete(wd.idleTime, address)
delete(wd.addressModelMap, address)
delete(wd.addressMap, address)
@@ -139,9 +140,10 @@ func (wd *WatchDog) checkBusy() {
model, ok := wd.addressModelMap[address]
if ok {
log.Warn().Msgf("[WatchDog] Model %s is busy for too long, killing it", model)
- if err := wd.pm.StopModel(model); err != nil {
+ if err := wd.pm.ShutdownModel(model); err != nil {
log.Error().Msgf("[watchdog] Error shutting down model %s: %v", model, err)
}
+ log.Debug().Msgf("[WatchDog] model shut down: %s", address)
delete(wd.timetable, address)
delete(wd.addressModelMap, address)
delete(wd.addressMap, address)
@@ -149,7 +151,6 @@ func (wd *WatchDog) checkBusy() {
log.Warn().Msgf("[WatchDog] Address %s unresolvable", address)
delete(wd.timetable, address)
}
-
}
}
}
From 0106c5818186bbb24298d53df7a88db1822d38f4 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sun, 24 Mar 2024 14:54:01 +0100
Subject: [PATCH 0034/2750] :arrow_up: Update ggerganov/llama.cpp (#1885)
Signed-off-by: GitHub
Co-authored-by: mudler
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index 2f50f362..adb3c813 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=56a00f0a2f48a85376f48b5ce77699df781631ae
+CPPLLAMA_VERSION?=95562175f83a49755ff6fd3bad09409417c8e6f9
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
From 3e293f14659d7228216a20897e3d2d695909a86a Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sun, 24 Mar 2024 22:12:18 +0100
Subject: [PATCH 0035/2750] :arrow_up: Update ggerganov/llama.cpp (#1889)
Signed-off-by: GitHub
Co-authored-by: mudler
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index adb3c813..c12ea8c0 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=95562175f83a49755ff6fd3bad09409417c8e6f9
+CPPLLAMA_VERSION?=a0e584defd8c16e7a51ab895f595df0448d710d0
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
From 6cf99527f8aa3c057bef68ce57809dcacfb15612 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Mon, 25 Mar 2024 03:01:30 +0100
Subject: [PATCH 0036/2750] docs(aio): Add All-in-One images docs (#1887)
* docs(aio): Add AIO images docs
* add image generation link to quickstart
* while reviewing I noticed this one link was missing, so quickly adding it.
Signed-off-by: Dave
Co-authored-by: Dave
---
Dockerfile | 3 +-
docs/content/docs/getting-started/build.md | 33 ++++---
.../docs/getting-started/quickstart.md | 98 ++++++++++++++++---
3 files changed, 107 insertions(+), 27 deletions(-)
diff --git a/Dockerfile b/Dockerfile
index b083690e..8725e76d 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -239,6 +239,7 @@ RUN mkdir -p /build/models
# Define the health check command
HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
CMD curl -f $HEALTHCHECK_ENDPOINT || exit 1
-
+
+VOLUME /build/models
EXPOSE 8080
ENTRYPOINT [ "/build/entrypoint.sh" ]
diff --git a/docs/content/docs/getting-started/build.md b/docs/content/docs/getting-started/build.md
index 238bdbec..8ceaf1f5 100644
--- a/docs/content/docs/getting-started/build.md
+++ b/docs/content/docs/getting-started/build.md
@@ -15,19 +15,7 @@ LocalAI's extensible architecture allows you to add your own backends, which can
In some cases you might want to re-build LocalAI from source (for instance to leverage Apple Silicon acceleration), or to build a custom container image with your own backends. This section contains instructions on how to build LocalAI from source.
-#### Container image
-Requirements:
-
-- Docker or podman, or a container engine
-
-In order to build the `LocalAI` container image locally you can use `docker`, for example:
-
-```
-# build the image
-docker build -t localai .
-docker run localai
-```
#### Build LocalAI locally
@@ -111,6 +99,27 @@ docker run --rm -ti -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS
{{% /alert %}}
+#### Container image
+
+Requirements:
+
+- Docker or podman, or a container engine
+
+In order to build the `LocalAI` container image locally you can use `docker`, for example:
+
+```
+# build the image
+docker build -t localai .
+docker run localai
+```
+
+There are some build arguments that can be used to customize the build:
+
+| Variable | Default | Description |
+| ---------------------| ------- | ----------- |
+| `IMAGE_TYPE` | `extras` | Build type. Available: `core`, `extras` |
+
+
### Example: Build on mac
Building on Mac (M1, M2 or M3) works, but you may need to install some prerequisites using `brew`.
diff --git a/docs/content/docs/getting-started/quickstart.md b/docs/content/docs/getting-started/quickstart.md
index e786d6d8..b5fd65d3 100644
--- a/docs/content/docs/getting-started/quickstart.md
+++ b/docs/content/docs/getting-started/quickstart.md
@@ -10,17 +10,8 @@ icon = "rocket_launch"
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that's compatible with OpenAI API specifications for local inferencing. It allows you to run [LLMs]({{%relref "docs/features/text-generation" %}}), generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families and architectures.
-## Installation Methods
-
LocalAI is available as a container image and binary, compatible with various container engines like Docker, Podman, and Kubernetes. Container images are published on [quay.io](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest) and [Docker Hub](https://hub.docker.com/r/localai/localai). Binaries can be downloaded from [GitHub](https://github.com/mudler/LocalAI/releases).
-
-{{% alert icon="💡" %}}
-
-**Hardware Requirements:** The hardware requirements for LocalAI vary based on the model size and quantization method used. For performance benchmarks with different backends, such as `llama.cpp`, visit [this link](https://github.com/ggerganov/llama.cpp#memorydisk-requirements). The `rwkv` backend is noted for its lower resource consumption.
-
-{{% /alert %}}
-
## Prerequisites
Before you begin, ensure you have a container engine installed if you are not using the binaries. Suitable options include Docker or Podman. For installation instructions, refer to the following guides:
@@ -29,14 +20,80 @@ Before you begin, ensure you have a container engine installed if you are not us
- [Install Podman (Linux)](https://podman.io/getting-started/installation)
- [Install Docker engine (Servers)](https://docs.docker.com/engine/install/#get-started)
+{{% alert icon="💡" %}}
+
+**Hardware Requirements:** The hardware requirements for LocalAI vary based on the model size and quantization method used. For performance benchmarks with different backends, such as `llama.cpp`, visit [this link](https://github.com/ggerganov/llama.cpp#memorydisk-requirements). The `rwkv` backend is noted for its lower resource consumption.
+
+{{% /alert %}}
+
+## Running LocalAI with All-in-One (AIO) Images
+
+LocalAI's All-in-One (AIO) images are pre-configured with a set of models and backends to fully leverage almost all the LocalAI featureset.
+
+These images are available for both CPU and GPU environments. The AIO images are designed to be easy to use and requires no configuration.
+
+It suggested to use the AIO images if you don't want to configure the models to run on LocalAI. If you want to run specific models, you can use the [manual method]({{%relref "docs/getting-started/manual" %}}).
+
+The AIO Images comes pre-configured with the following features:
+- Text to Speech (TTS)
+- Speech to Text
+- Function calling
+- Large Language Models (LLM) for text generation
+- Image generation
+- Embedding server
+
+
+Start the image with Docker:
+
+```bash
+docker run -p 8080:8080 --name local-ai -ti localai/localai:{{< version >}}-aio-cpu
+# For Nvidia GPUs:
+# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:{{< version >}}-aio-gpu-cuda-11
+# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:{{< version >}}-aio-gpu-cuda-12
+```
+
+
+Or with a docker-compose file:
+
+```yaml
+version: "3.9"
+services:
+ api:
+ image: localai/localai:{{< version >}}-aio-cpu
+ # For Nvidia GPUs decomment one of the following (cuda11 or cuda12):
+ # image: localai/localai:{{< version >}}-aio-gpu-cuda-11
+ # image: localai/localai:{{< version >}}-aio-gpu-cuda-12
+ healthcheck:
+ test: ["CMD", "curl", "-f", "http://localhost:8080/readyz"]
+ interval: 1m
+ timeout: 120m
+ retries: 120
+ ports:
+ - 8080:8080
+ environment:
+ - DEBUG=true
+ # ...
+ volumes:
+ - ./models:/build/models:cached
+ # decomment the following piece if running with Nvidia GPUs
+ # deploy:
+ # resources:
+ # reservations:
+ # devices:
+ # - driver: nvidia
+ # count: 1
+ # capabilities: [gpu]
+```
## Running Models
> _Do you have already a model file? Skip to [Run models manually]({{%relref "docs/getting-started/manual" %}})_.
-LocalAI allows one-click runs with popular models. It downloads the model and starts the API with the model loaded.
+To load models into LocalAI, you can either [use models manually]({{%relref "docs/getting-started/manual" %}}) or configure LocalAI to pull the models from external sources, like Huggingface and configure the model.
-There are different categories of models: [LLMs]({{%relref "docs/features/text-generation" %}}), [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) , [Embeddings]({{%relref "docs/features/embeddings" %}}), [Audio to Text]({{%relref "docs/features/audio-to-text" %}}), and [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) depending on the backend being used and the model architecture.
+To do that, you can point LocalAI to an URL to a YAML configuration file - however - LocalAI does also have some popular model configuration embedded in the binary as well. Below you can find a list of the models configuration that LocalAI has pre-built, see [Model customization]({{%relref "docs/getting-started/customize-model" %}}) on how to configure models from URLs.
+
+There are different categories of models: [LLMs]({{%relref "docs/features/text-generation" %}}), [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) , [Embeddings]({{%relref "docs/features/embeddings" %}}), [Image Generation]({{%relref "docs/features/image-generation" %}}), [Audio to Text]({{%relref "docs/features/audio-to-text" %}}), and [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) depending on the backend being used and the model architecture.
{{% alert icon="💡" %}}
@@ -51,7 +108,10 @@ To customize the models, see [Model customization]({{%relref "docs/getting-start
| Model | Category | Docker command |
| --- | --- | --- |
| [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core phi-2``` |
-| 🌋 [llava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava``` |
+| 🌋 [bakllava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core bakllava``` |
+| 🌋 [llava-1.5](https://llava-vl.github.io/) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava-1.5``` |
+| 🌋 [llava-1.6-mistral](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava-1.6-mistral``` |
+| 🌋 [llava-1.6-vicuna](https://huggingface.co/cmp-nct/llava-1.6-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava-1.6-vicuna``` |
| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core mistral-openorca``` |
| [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core bert-cpp``` |
| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg all-minilm-l6-v2``` |
@@ -68,7 +128,9 @@ To customize the models, see [Model customization]({{%relref "docs/getting-start
| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only |
| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) (with transformers) | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only |
| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) (with llama.cpp) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core codellama-7b-gguf``` |
+| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core hermes-2-pro-mistral``` |
{{% /tab %}}
+
{{% tab tabName="GPU (CUDA 11)" %}}
@@ -77,7 +139,10 @@ To customize the models, see [Model customization]({{%relref "docs/getting-start
| Model | Category | Docker command |
| --- | --- | --- |
| [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core phi-2``` |
-| 🌋 [llava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core llava``` |
+| 🌋 [bakllava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core bakllava``` |
+| 🌋 [llava-1.5](https://llava-vl.github.io/) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core llava-1.5``` |
+| 🌋 [llava-1.6-mistral](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core llava-1.6-mistral``` |
+| 🌋 [llava-1.6-vicuna](https://huggingface.co/cmp-nct/llava-1.6-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core llava-1.6-vicuna``` |
| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core mistral-openorca``` |
| [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core bert-cpp``` |
| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 all-minilm-l6-v2``` |
@@ -94,6 +159,7 @@ To customize the models, see [Model customization]({{%relref "docs/getting-start
| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 transformers-tinyllama``` |
| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 codellama-7b``` |
| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core codellama-7b-gguf``` |
+| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core hermes-2-pro-mistral``` |
{{% /tab %}}
@@ -104,7 +170,10 @@ To customize the models, see [Model customization]({{%relref "docs/getting-start
| Model | Category | Docker command |
| --- | --- | --- |
| [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core phi-2``` |
-| 🌋 [llava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core llava``` |
+| 🌋 [bakllava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core bakllava``` |
+| 🌋 [llava-1.5](https://llava-vl.github.io/) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core llava-1.5``` |
+| 🌋 [llava-1.6-mistral](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core llava-1.6-mistral``` |
+| 🌋 [llava-1.6-vicuna](https://huggingface.co/cmp-nct/llava-1.6-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core llava-1.6-vicuna``` |
| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core mistral-openorca``` |
| [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core bert-cpp``` |
| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 all-minilm-l6-v2``` |
@@ -121,6 +190,7 @@ To customize the models, see [Model customization]({{%relref "docs/getting-start
| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 transformers-tinyllama``` |
| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 codellama-7b``` |
| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core codellama-7b-gguf``` |
+| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core hermes-2-pro-mistral``` |
{{% /tab %}}
{{< /tabs >}}
From 5e1238252458a548cba197f7c1e88fac44d6f3e7 Mon Sep 17 00:00:00 2001
From: Enrico Ros
Date: Mon, 25 Mar 2024 00:32:40 -0700
Subject: [PATCH 0037/2750] NVIDIA GPU detection support for WSL2 environments
(#1891)
This change makes the assumption that "Microsoft Corporation Device 008e"
is an NVIDIA CUDA device. If this is not the case, please update the
hardware detection script here.
Signed-off-by: Enrico Ros
Co-authored-by: Dave
---
aio/entrypoint.sh | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/aio/entrypoint.sh b/aio/entrypoint.sh
index b2f64f63..aeb5e4de 100755
--- a/aio/entrypoint.sh
+++ b/aio/entrypoint.sh
@@ -33,6 +33,17 @@ function detect_gpu() {
else
echo "Intel GPU detected, but Intel GPU drivers are not installed. GPU acceleration will not be available."
fi
+ elif lspci | grep -E 'VGA|3D' | grep -iq "Microsoft Corporation Device 008e"; then
+ # We make the assumption this WSL2 cars is NVIDIA, then check for nvidia-smi
+ # Make sure the container was run with `--gpus all` as the only required parameter
+ echo "NVIDIA GPU detected via WSL2"
+ # nvidia-smi should be installed in the container
+ if nvidia-smi; then
+ GPU_ACCELERATION=true
+ GPU_VENDOR=nvidia
+ else
+ echo "NVIDIA GPU detected via WSL2, but nvidia-smi is not installed. GPU acceleration will not be available."
+ fi
fi
;;
Darwin)
@@ -95,4 +106,4 @@ check_vars
echo "Starting LocalAI with the following models: $MODELS"
-/build/entrypoint.sh "$@"
\ No newline at end of file
+/build/entrypoint.sh "$@"
From 08c7b172980d707324ee7545e6f8b5be8dbddf4b Mon Sep 17 00:00:00 2001
From: Enrico Ros
Date: Mon, 25 Mar 2024 10:36:18 -0700
Subject: [PATCH 0038/2750] Fix NVIDIA VRAM detection on WSL2 environments
(#1894)
* NVIDIA VRAM detection on WSL2 environments
More robust single NVIDIA GPU memory detection, following the
improved NVIDIA WSL2 detection patch yesterday #1891.
Tested and working on WSL2, Linux.
Signed-off-by: Enrico Ros
* Update aio/entrypoint.sh
Signed-off-by: Ettore Di Giacinto
---------
Signed-off-by: Enrico Ros
Signed-off-by: Ettore Di Giacinto
Co-authored-by: Ettore Di Giacinto
---
aio/entrypoint.sh | 36 ++++++++++++++++++++----------------
1 file changed, 20 insertions(+), 16 deletions(-)
diff --git a/aio/entrypoint.sh b/aio/entrypoint.sh
index aeb5e4de..795cb86a 100755
--- a/aio/entrypoint.sh
+++ b/aio/entrypoint.sh
@@ -57,29 +57,33 @@ function detect_gpu() {
}
function detect_gpu_size() {
- if [ "$GPU_ACCELERATION" = true ]; then
- GPU_SIZE=gpu-8g
- fi
-
# Attempting to find GPU memory size for NVIDIA GPUs
- if echo "$gpu_model" | grep -iq nvidia; then
+ if [ "$GPU_ACCELERATION" = true ] && [ "$GPU_VENDOR" = "nvidia" ]; then
echo "NVIDIA GPU detected. Attempting to find memory size..."
- nvidia_sm=($(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits))
+ # Using head -n 1 to get the total memory of the 1st NVIDIA GPU detected.
+ # If handling multiple GPUs is required in the future, this is the place to do it
+ nvidia_sm=$(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits | head -n 1)
if [ ! -z "$nvidia_sm" ]; then
- echo "Total GPU Memory: ${nvidia_sm[0]} MiB"
+ echo "Total GPU Memory: $nvidia_sm MiB"
+ # if bigger than 8GB, use 16GB
+ #if [ "$nvidia_sm" -gt 8192 ]; then
+ # GPU_SIZE=gpu-16g
+ #else
+ GPU_SIZE=gpu-8g
+ #fi
else
- echo "Unable to determine NVIDIA GPU memory size."
+ echo "Unable to determine NVIDIA GPU memory size. Falling back to CPU."
+ GPU_SIZE=gpu-8g
fi
- # if bigger than 8GB, use 16GB
- #if [ "$nvidia_sm" -gt 8192 ]; then
- # GPU_SIZE=gpu-16g
- #fi
- else
- echo "Non-NVIDIA GPU detected. GPU memory size detection for non-NVIDIA GPUs is not supported in this script."
- fi
+
+ # Default to a generic GPU size until we implement GPU size detection for non NVIDIA GPUs
+ elif [ "$GPU_ACCELERATION" = true ]; then
+ echo "Non-NVIDIA GPU detected. Specific GPU memory size detection is not implemented."
+ GPU_SIZE=gpu-8g
# default to cpu if GPU_SIZE is not set
- if [ -z "$GPU_SIZE" ]; then
+ else
+ echo "GPU acceleration is not enabled or supported. Defaulting to CPU."
GPU_SIZE=cpu
fi
}
From c9adc5680c1637efb60662e9c5d71c777c59a046 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Mon, 25 Mar 2024 22:04:32 +0100
Subject: [PATCH 0039/2750] fix(aio): make image-gen for GPU functional, update
docs (#1895)
* readme: update quickstart
* aio(gpu): fix dreamshaper
* tests(aio): allow to run tests also against an endpoint
* docs: split content
* tests: less verbosity
---------
Co-authored-by: Dave
---
README.md | 31 +--
aio/gpu-8g/image-gen.yaml | 6 +-
.../docs/getting-started/quickstart.md | 192 ++----------------
.../docs/getting-started/run-other-models.md | 126 ++++++++++++
docs/content/docs/overview.md | 48 ++++-
docs/content/docs/reference/aio-images.md | 39 ++++
.../docs/reference/container-images.md | 103 ++++++++++
tests/e2e-aio/e2e_suite_test.go | 67 +++---
tests/e2e-aio/e2e_test.go | 2 +-
9 files changed, 380 insertions(+), 234 deletions(-)
create mode 100644 docs/content/docs/getting-started/run-other-models.md
create mode 100644 docs/content/docs/reference/aio-images.md
create mode 100644 docs/content/docs/reference/container-images.md
diff --git a/README.md b/README.md
index b101ee7f..7ba96ad5 100644
--- a/README.md
+++ b/README.md
@@ -20,14 +20,14 @@
-[ ](https://hub.docker.com/r/localai/localai)
-[ ](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest)
-
-> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
->
-> [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/) [ 🚀 Roadmap ](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
-
-[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
+
+
+
+
+
+
+
+
@@ -36,8 +36,11 @@
+
-**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that’s compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU.
+[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
+
+**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that’s compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU.
## 🔥🔥 Hot topics / Roadmap
@@ -67,10 +70,14 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl
## 💻 [Getting started](https://localai.io/basics/getting_started/index.html)
-For a detailed step-by-step introduction, refer to the [Getting Started](https://localai.io/basics/getting_started/index.html) guide. For those in a hurry, here's a straightforward one-liner to launch a LocalAI instance with [phi-2](https://huggingface.co/microsoft/phi-2) using `docker`:
+For a detailed step-by-step introduction, refer to the [Getting Started](https://localai.io/basics/getting_started/index.html) guide.
-```
-docker run -ti -p 8080:8080 localai/localai:v2.9.0-ffmpeg-core phi-2
+For those in a hurry, here's a straightforward one-liner to launch a LocalAI AIO(All-in-one) Image using `docker`:
+
+```bash
+docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
+# or, if you have an Nvidia GPU:
+# docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-cuda12
```
## 🚀 [Features](https://localai.io/features/)
diff --git a/aio/gpu-8g/image-gen.yaml b/aio/gpu-8g/image-gen.yaml
index 9868572f..53994ebb 100644
--- a/aio/gpu-8g/image-gen.yaml
+++ b/aio/gpu-8g/image-gen.yaml
@@ -1,6 +1,6 @@
name: stablediffusion
parameters:
- model: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors
+ model: DreamShaper_8_pruned.safetensors
backend: diffusers
step: 25
f16: true
@@ -11,6 +11,10 @@ diffusers:
enable_parameters: "negative_prompt,num_inference_steps"
scheduler_type: "k_dpmpp_2m"
+download_files:
+- filename: DreamShaper_8_pruned.safetensors
+ uri: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors
+
usage: |
curl http://localhost:8080/v1/images/generations \
-H "Content-Type: application/json" \
diff --git a/docs/content/docs/getting-started/quickstart.md b/docs/content/docs/getting-started/quickstart.md
index b5fd65d3..33ec4cfa 100644
--- a/docs/content/docs/getting-started/quickstart.md
+++ b/docs/content/docs/getting-started/quickstart.md
@@ -28,6 +28,8 @@ Before you begin, ensure you have a container engine installed if you are not us
## Running LocalAI with All-in-One (AIO) Images
+> _Do you have already a model file? Skip to [Run models manually]({{%relref "docs/getting-started/manual" %}}) or [Run other models]({{%relref "docs/getting-started/run-other-models" %}}) to use an already-configured model_.
+
LocalAI's All-in-One (AIO) images are pre-configured with a set of models and backends to fully leverage almost all the LocalAI featureset.
These images are available for both CPU and GPU environments. The AIO images are designed to be easy to use and requires no configuration.
@@ -46,10 +48,10 @@ The AIO Images comes pre-configured with the following features:
Start the image with Docker:
```bash
-docker run -p 8080:8080 --name local-ai -ti localai/localai:{{< version >}}-aio-cpu
+docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu
# For Nvidia GPUs:
-# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:{{< version >}}-aio-gpu-cuda-11
-# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:{{< version >}}-aio-gpu-cuda-12
+# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:latest-aio-gpu-cuda-11
+# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:latest-aio-gpu-cuda-12
```
@@ -85,185 +87,15 @@ services:
# capabilities: [gpu]
```
-## Running Models
-
-> _Do you have already a model file? Skip to [Run models manually]({{%relref "docs/getting-started/manual" %}})_.
-
-To load models into LocalAI, you can either [use models manually]({{%relref "docs/getting-started/manual" %}}) or configure LocalAI to pull the models from external sources, like Huggingface and configure the model.
-
-To do that, you can point LocalAI to an URL to a YAML configuration file - however - LocalAI does also have some popular model configuration embedded in the binary as well. Below you can find a list of the models configuration that LocalAI has pre-built, see [Model customization]({{%relref "docs/getting-started/customize-model" %}}) on how to configure models from URLs.
-
-There are different categories of models: [LLMs]({{%relref "docs/features/text-generation" %}}), [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) , [Embeddings]({{%relref "docs/features/embeddings" %}}), [Image Generation]({{%relref "docs/features/image-generation" %}}), [Audio to Text]({{%relref "docs/features/audio-to-text" %}}), and [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) depending on the backend being used and the model architecture.
-
-{{% alert icon="💡" %}}
-
-To customize the models, see [Model customization]({{%relref "docs/getting-started/customize-model" %}}). For more model configurations, visit the [Examples Section](https://github.com/mudler/LocalAI/tree/master/examples/configurations) and the configurations for the models below is available [here](https://github.com/mudler/LocalAI/tree/master/embedded/models).
-{{% /alert %}}
-
-{{< tabs tabTotal="3" >}}
-{{% tab tabName="CPU-only" %}}
-
-> 💡Don't need GPU acceleration? use the CPU images which are lighter and do not have Nvidia dependencies
-
-| Model | Category | Docker command |
-| --- | --- | --- |
-| [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core phi-2``` |
-| 🌋 [bakllava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core bakllava``` |
-| 🌋 [llava-1.5](https://llava-vl.github.io/) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava-1.5``` |
-| 🌋 [llava-1.6-mistral](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava-1.6-mistral``` |
-| 🌋 [llava-1.6-vicuna](https://huggingface.co/cmp-nct/llava-1.6-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava-1.6-vicuna``` |
-| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core mistral-openorca``` |
-| [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core bert-cpp``` |
-| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg all-minilm-l6-v2``` |
-| whisper-base | [Audio to Text]({{%relref "docs/features/audio-to-text" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core whisper-base``` |
-| rhasspy-voice-en-us-amy | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core rhasspy-voice-en-us-amy``` |
-| 🐸 [coqui](https://github.com/coqui-ai/TTS) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg coqui``` |
-| 🐶 [bark](https://github.com/suno-ai/bark) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg bark``` |
-| 🔊 [vall-e-x](https://github.com/Plachtaa/VALL-E-X) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg vall-e-x``` |
-| mixtral-instruct Mixtral-8x7B-Instruct-v0.1 | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core mixtral-instruct``` |
-| [tinyllama-chat](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF) [original model](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core tinyllama-chat``` |
-| [dolphin-2.5-mixtral-8x7b](https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core dolphin-2.5-mixtral-8x7b``` |
-| 🐍 [mamba](https://github.com/state-spaces/mamba) | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only |
-| animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) | GPU-only |
-| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only |
-| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) (with transformers) | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only |
-| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) (with llama.cpp) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core codellama-7b-gguf``` |
-| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core hermes-2-pro-mistral``` |
-{{% /tab %}}
-
-{{% tab tabName="GPU (CUDA 11)" %}}
-
-
-> To know which version of CUDA do you have available, you can check with `nvidia-smi` or `nvcc --version` see also [GPU acceleration]({{%relref "docs/features/gpu-acceleration" %}}).
-
-| Model | Category | Docker command |
-| --- | --- | --- |
-| [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core phi-2``` |
-| 🌋 [bakllava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core bakllava``` |
-| 🌋 [llava-1.5](https://llava-vl.github.io/) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core llava-1.5``` |
-| 🌋 [llava-1.6-mistral](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core llava-1.6-mistral``` |
-| 🌋 [llava-1.6-vicuna](https://huggingface.co/cmp-nct/llava-1.6-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core llava-1.6-vicuna``` |
-| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core mistral-openorca``` |
-| [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core bert-cpp``` |
-| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 all-minilm-l6-v2``` |
-| whisper-base | [Audio to Text]({{%relref "docs/features/audio-to-text" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core whisper-base``` |
-| rhasspy-voice-en-us-amy | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core rhasspy-voice-en-us-amy``` |
-| 🐸 [coqui](https://github.com/coqui-ai/TTS) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 coqui``` |
-| 🐶 [bark](https://github.com/suno-ai/bark) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 bark``` |
-| 🔊 [vall-e-x](https://github.com/Plachtaa/VALL-E-X) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 vall-e-x``` |
-| mixtral-instruct Mixtral-8x7B-Instruct-v0.1 | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core mixtral-instruct``` |
-| [tinyllama-chat](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF) [original model](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core tinyllama-chat``` |
-| [dolphin-2.5-mixtral-8x7b](https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core dolphin-2.5-mixtral-8x7b``` |
-| 🐍 [mamba](https://github.com/state-spaces/mamba) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 mamba-chat``` |
-| animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) | ```docker run -ti -p 8080:8080 -e COMPEL=0 --gpus all localai/localai:{{< version >}}-cublas-cuda11 animagine-xl``` |
-| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 transformers-tinyllama``` |
-| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 codellama-7b``` |
-| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core codellama-7b-gguf``` |
-| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core hermes-2-pro-mistral``` |
-{{% /tab %}}
-
-
-{{% tab tabName="GPU (CUDA 12)" %}}
-
-> To know which version of CUDA do you have available, you can check with `nvidia-smi` or `nvcc --version` see also [GPU acceleration]({{%relref "docs/features/gpu-acceleration" %}}).
-
-| Model | Category | Docker command |
-| --- | --- | --- |
-| [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core phi-2``` |
-| 🌋 [bakllava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core bakllava``` |
-| 🌋 [llava-1.5](https://llava-vl.github.io/) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core llava-1.5``` |
-| 🌋 [llava-1.6-mistral](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core llava-1.6-mistral``` |
-| 🌋 [llava-1.6-vicuna](https://huggingface.co/cmp-nct/llava-1.6-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core llava-1.6-vicuna``` |
-| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core mistral-openorca``` |
-| [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core bert-cpp``` |
-| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 all-minilm-l6-v2``` |
-| whisper-base | [Audio to Text]({{%relref "docs/features/audio-to-text" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core whisper-base``` |
-| rhasspy-voice-en-us-amy | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core rhasspy-voice-en-us-amy``` |
-| 🐸 [coqui](https://github.com/coqui-ai/TTS) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 coqui``` |
-| 🐶 [bark](https://github.com/suno-ai/bark) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 bark``` |
-| 🔊 [vall-e-x](https://github.com/Plachtaa/VALL-E-X) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 vall-e-x``` |
-| mixtral-instruct Mixtral-8x7B-Instruct-v0.1 | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core mixtral-instruct``` |
-| [tinyllama-chat](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF) [original model](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core tinyllama-chat``` |
-| [dolphin-2.5-mixtral-8x7b](https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core dolphin-2.5-mixtral-8x7b``` |
-| 🐍 [mamba](https://github.com/state-spaces/mamba) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 mamba-chat``` |
-| animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) | ```docker run -ti -p 8080:8080 -e COMPEL=0 --gpus all localai/localai:{{< version >}}-cublas-cuda12 animagine-xl``` |
-| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 transformers-tinyllama``` |
-| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 codellama-7b``` |
-| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core codellama-7b-gguf``` |
-| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core hermes-2-pro-mistral``` |
-{{% /tab %}}
-
-{{< /tabs >}}
-
-{{% alert icon="💡" %}}
-**Tip** You can actually specify multiple models to start an instance with the models loaded, for example to have both llava and phi-2 configured:
-
-```bash
-docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava phi-2
-```
-
-{{% /alert %}}
-
-## Container images
-
-LocalAI provides a variety of images to support different environments. These images are available on [quay.io](https://quay.io/repository/go-skynet/local-ai?tab=tags) and [Docker Hub](https://hub.docker.com/r/localai/localai).
-
-For GPU Acceleration support for Nvidia video graphic cards, use the Nvidia/CUDA images, if you don't have a GPU, use the CPU images. If you have AMD or Mac Silicon, see the [build section]({{%relref "docs/getting-started/build" %}}).
-
-{{% alert icon="💡" %}}
-
-**Available Images Types**:
-
-- Images ending with `-core` are smaller images without predownload python dependencies. Use these images if you plan to use `llama.cpp`, `stablediffusion-ncn`, `tinydream` or `rwkv` backends - if you are not sure which one to use, do **not** use these images.
-- FFMpeg is **not** included in the default images due to [its licensing](https://www.ffmpeg.org/legal.html). If you need FFMpeg, use the images ending with `-ffmpeg`. Note that `ffmpeg` is needed in case of using `audio-to-text` LocalAI's features.
-- If using old and outdated CPUs and no GPUs you might need to set `REBUILD` to `true` as environment variable along with options to disable the flags which your CPU does not support, however note that inference will perform poorly and slow. See also [flagset compatibility]({{%relref "docs/getting-started/build#cpu-flagset-compatibility" %}}).
-
-{{% /alert %}}
-
-{{< tabs tabTotal="3" >}}
-{{% tab tabName="Vanilla / CPU Images" %}}
-
-| Description | Quay | Docker Hub |
-| --- | --- |-----------------------------------------------|
-| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master` | `localai/localai:master` |
-| Latest tag | `quay.io/go-skynet/local-ai:latest` | `localai/localai:latest` |
-| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}` | `localai/localai:{{< version >}}` |
-| Versioned image including FFMpeg| `quay.io/go-skynet/local-ai:{{< version >}}-ffmpeg` | `localai/localai:{{< version >}}-ffmpeg` |
-| Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-ffmpeg-core` | `localai/localai:{{< version >}}-ffmpeg-core` |
-
-{{% /tab %}}
-
-{{% tab tabName="GPU Images CUDA 11" %}}
-
-| Description | Quay | Docker Hub |
-| --- | --- |-------------------------------------------------------------|
-| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-cublas-cuda11` | `localai/localai:master-cublas-cuda11` |
-| Latest tag | `quay.io/go-skynet/local-ai:latest-cublas-cuda11` | `localai/localai:latest-cublas-cuda11` |
-| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11` | `localai/localai:{{< version >}}-cublas-cuda11` |
-| Versioned image including FFMpeg| `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11-ffmpeg` | `localai/localai:{{< version >}}-cublas-cuda11-ffmpeg` |
-| Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11-ffmpeg-core` | `localai/localai:{{< version >}}-cublas-cuda11-ffmpeg-core` |
-
-{{% /tab %}}
-
-{{% tab tabName="GPU Images CUDA 12" %}}
-
-| Description | Quay | Docker Hub |
-| --- | --- |-------------------------------------------------------------|
-| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-cublas-cuda12` | `localai/localai:master-cublas-cuda12` |
-| Latest tag | `quay.io/go-skynet/local-ai:latest-cublas-cuda12` | `localai/localai:latest-cublas-cuda12` |
-| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12` | `localai/localai:{{< version >}}-cublas-cuda12` |
-| Versioned image including FFMpeg| `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12-ffmpeg` | `localai/localai:{{< version >}}-cublas-cuda12-ffmpeg` |
-| Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12-ffmpeg-core` | `localai/localai:{{< version >}}-cublas-cuda12-ffmpeg-core` |
-
-{{% /tab %}}
-
-{{< /tabs >}}
+For a list of all the container-images available, see [Container images]({{%relref "docs/reference/container-images" %}}). To learn more about All-in-one images instead, see [All-in-one Images]({{%relref "docs/reference/aio-images" %}}).
## What's next?
Explore further resources and community contributions:
-- [Community How to's](https://io.midori-ai.xyz/howtos/)
-- [Examples](https://github.com/mudler/LocalAI/tree/master/examples#examples)
-
-[](https://github.com/mudler/LocalAI/tree/master/examples#examples)
+- [Build LocalAI and the container image]({{%relref "docs/getting-started/build" %}})
+- [Run models manually]({{%relref "docs/getting-started/manual" %}})
+- [Run other models]({{%relref "docs/getting-started/run-other-models" %}})
+- [Container images]({{%relref "docs/reference/container-images" %}})
+- [All-in-one Images]({{%relref "docs/reference/aio-images" %}})
+- [Examples](https://github.com/mudler/LocalAI/tree/master/examples#examples)
\ No newline at end of file
diff --git a/docs/content/docs/getting-started/run-other-models.md b/docs/content/docs/getting-started/run-other-models.md
new file mode 100644
index 00000000..4420550d
--- /dev/null
+++ b/docs/content/docs/getting-started/run-other-models.md
@@ -0,0 +1,126 @@
++++
+disableToc = false
+title = "Run other Models"
+weight = 3
+icon = "rocket_launch"
+
++++
+
+## Running other models
+
+> _Do you have already a model file? Skip to [Run models manually]({{%relref "docs/getting-started/manual" %}})_.
+
+To load models into LocalAI, you can either [use models manually]({{%relref "docs/getting-started/manual" %}}) or configure LocalAI to pull the models from external sources, like Huggingface and configure the model.
+
+To do that, you can point LocalAI to an URL to a YAML configuration file - however - LocalAI does also have some popular model configuration embedded in the binary as well. Below you can find a list of the models configuration that LocalAI has pre-built, see [Model customization]({{%relref "docs/getting-started/customize-model" %}}) on how to configure models from URLs.
+
+There are different categories of models: [LLMs]({{%relref "docs/features/text-generation" %}}), [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) , [Embeddings]({{%relref "docs/features/embeddings" %}}), [Audio to Text]({{%relref "docs/features/audio-to-text" %}}), and [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) depending on the backend being used and the model architecture.
+
+{{% alert icon="💡" %}}
+
+To customize the models, see [Model customization]({{%relref "docs/getting-started/customize-model" %}}). For more model configurations, visit the [Examples Section](https://github.com/mudler/LocalAI/tree/master/examples/configurations) and the configurations for the models below is available [here](https://github.com/mudler/LocalAI/tree/master/embedded/models).
+{{% /alert %}}
+
+{{< tabs tabTotal="3" >}}
+{{% tab tabName="CPU-only" %}}
+
+> 💡Don't need GPU acceleration? use the CPU images which are lighter and do not have Nvidia dependencies
+
+| Model | Category | Docker command |
+| --- | --- | --- |
+| [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core phi-2``` |
+| 🌋 [bakllava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core bakllava``` |
+| 🌋 [llava-1.5](https://llava-vl.github.io/) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava-1.5``` |
+| 🌋 [llava-1.6-mistral](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava-1.6-mistral``` |
+| 🌋 [llava-1.6-vicuna](https://huggingface.co/cmp-nct/llava-1.6-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava-1.6-vicuna``` |
+| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core mistral-openorca``` |
+| [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core bert-cpp``` |
+| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg all-minilm-l6-v2``` |
+| whisper-base | [Audio to Text]({{%relref "docs/features/audio-to-text" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core whisper-base``` |
+| rhasspy-voice-en-us-amy | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core rhasspy-voice-en-us-amy``` |
+| 🐸 [coqui](https://github.com/coqui-ai/TTS) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg coqui``` |
+| 🐶 [bark](https://github.com/suno-ai/bark) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg bark``` |
+| 🔊 [vall-e-x](https://github.com/Plachtaa/VALL-E-X) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg vall-e-x``` |
+| mixtral-instruct Mixtral-8x7B-Instruct-v0.1 | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core mixtral-instruct``` |
+| [tinyllama-chat](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF) [original model](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core tinyllama-chat``` |
+| [dolphin-2.5-mixtral-8x7b](https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core dolphin-2.5-mixtral-8x7b``` |
+| 🐍 [mamba](https://github.com/state-spaces/mamba) | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only |
+| animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) | GPU-only |
+| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only |
+| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) (with transformers) | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only |
+| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) (with llama.cpp) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core codellama-7b-gguf``` |
+| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core hermes-2-pro-mistral``` |
+{{% /tab %}}
+
+{{% tab tabName="GPU (CUDA 11)" %}}
+
+
+> To know which version of CUDA do you have available, you can check with `nvidia-smi` or `nvcc --version` see also [GPU acceleration]({{%relref "docs/features/gpu-acceleration" %}}).
+
+| Model | Category | Docker command |
+| --- | --- | --- |
+| [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core phi-2``` |
+| 🌋 [bakllava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core bakllava``` |
+| 🌋 [llava-1.5](https://llava-vl.github.io/) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core llava-1.5``` |
+| 🌋 [llava-1.6-mistral](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core llava-1.6-mistral``` |
+| 🌋 [llava-1.6-vicuna](https://huggingface.co/cmp-nct/llava-1.6-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core llava-1.6-vicuna``` |
+| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core mistral-openorca``` |
+| [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core bert-cpp``` |
+| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 all-minilm-l6-v2``` |
+| whisper-base | [Audio to Text]({{%relref "docs/features/audio-to-text" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core whisper-base``` |
+| rhasspy-voice-en-us-amy | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core rhasspy-voice-en-us-amy``` |
+| 🐸 [coqui](https://github.com/coqui-ai/TTS) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 coqui``` |
+| 🐶 [bark](https://github.com/suno-ai/bark) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 bark``` |
+| 🔊 [vall-e-x](https://github.com/Plachtaa/VALL-E-X) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 vall-e-x``` |
+| mixtral-instruct Mixtral-8x7B-Instruct-v0.1 | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core mixtral-instruct``` |
+| [tinyllama-chat](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF) [original model](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core tinyllama-chat``` |
+| [dolphin-2.5-mixtral-8x7b](https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core dolphin-2.5-mixtral-8x7b``` |
+| 🐍 [mamba](https://github.com/state-spaces/mamba) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 mamba-chat``` |
+| animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) | ```docker run -ti -p 8080:8080 -e COMPEL=0 --gpus all localai/localai:{{< version >}}-cublas-cuda11 animagine-xl``` |
+| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 transformers-tinyllama``` |
+| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 codellama-7b``` |
+| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core codellama-7b-gguf``` |
+| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core hermes-2-pro-mistral``` |
+{{% /tab %}}
+
+
+{{% tab tabName="GPU (CUDA 12)" %}}
+
+> To know which version of CUDA do you have available, you can check with `nvidia-smi` or `nvcc --version` see also [GPU acceleration]({{%relref "docs/features/gpu-acceleration" %}}).
+
+| Model | Category | Docker command |
+| --- | --- | --- |
+| [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core phi-2``` |
+| 🌋 [bakllava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core bakllava``` |
+| 🌋 [llava-1.5](https://llava-vl.github.io/) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core llava-1.5``` |
+| 🌋 [llava-1.6-mistral](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core llava-1.6-mistral``` |
+| 🌋 [llava-1.6-vicuna](https://huggingface.co/cmp-nct/llava-1.6-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core llava-1.6-vicuna``` |
+| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core mistral-openorca``` |
+| [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core bert-cpp``` |
+| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 all-minilm-l6-v2``` |
+| whisper-base | [Audio to Text]({{%relref "docs/features/audio-to-text" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core whisper-base``` |
+| rhasspy-voice-en-us-amy | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core rhasspy-voice-en-us-amy``` |
+| 🐸 [coqui](https://github.com/coqui-ai/TTS) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 coqui``` |
+| 🐶 [bark](https://github.com/suno-ai/bark) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 bark``` |
+| 🔊 [vall-e-x](https://github.com/Plachtaa/VALL-E-X) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 vall-e-x``` |
+| mixtral-instruct Mixtral-8x7B-Instruct-v0.1 | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core mixtral-instruct``` |
+| [tinyllama-chat](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF) [original model](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core tinyllama-chat``` |
+| [dolphin-2.5-mixtral-8x7b](https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core dolphin-2.5-mixtral-8x7b``` |
+| 🐍 [mamba](https://github.com/state-spaces/mamba) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 mamba-chat``` |
+| animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) | ```docker run -ti -p 8080:8080 -e COMPEL=0 --gpus all localai/localai:{{< version >}}-cublas-cuda12 animagine-xl``` |
+| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 transformers-tinyllama``` |
+| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 codellama-7b``` |
+| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core codellama-7b-gguf``` |
+| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core hermes-2-pro-mistral``` |
+{{% /tab %}}
+
+{{< /tabs >}}
+
+{{% alert icon="💡" %}}
+**Tip** You can actually specify multiple models to start an instance with the models loaded, for example to have both llava and phi-2 configured:
+
+```bash
+docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava phi-2
+```
+
+{{% /alert %}}
\ No newline at end of file
diff --git a/docs/content/docs/overview.md b/docs/content/docs/overview.md
index f78a9be0..40ec9e4f 100644
--- a/docs/content/docs/overview.md
+++ b/docs/content/docs/overview.md
@@ -31,14 +31,14 @@ icon = "info"
-[ ](https://hub.docker.com/r/localai/localai)
-[ ](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest)
-
-> 💡 Get help - [❓FAQ](https://localai.io/faq/) [❓How tos](https://io.midori-ai.xyz/howtos/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [💭Discord](https://discord.gg/uJAeKSAGDy)
->
-> [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/) [ 🚀 Roadmap ](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
-
-**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that's compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families and architectures. Does not require GPU. It is maintained by [mudler](https://github.com/mudler).
+
+
+
+
+
+
+
+
@@ -47,6 +47,35 @@ icon = "info"
+
+
+
+> 💡 Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [💭Discord](https://discord.gg/uJAeKSAGDy)
+>
+> [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/) [ 🚀 Roadmap ](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
+
+
+
+
+**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that's compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families and architectures. Does not require GPU. It is maintained by [mudler](https://github.com/mudler).
+
+
+## Start LocalAI
+
+Start the image with Docker to have a functional clone of OpenAI! 🚀:
+
+```bash
+docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu
+# Do you have a Nvidia GPUs? Use this instead
+# CUDA 11
+# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:latest-aio-gpu-cuda-11
+# CUDA 12
+# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:latest-aio-gpu-cuda-12
+```
+
+See the [💻 Quickstart](https://localai.io/basics/getting_started/) for all the options and way you can run LocalAI!
+
+## What is LocalAI?
In a nutshell:
@@ -61,8 +90,7 @@ LocalAI is focused on making the AI accessible to anyone. Any contribution, feed
Note that this started just as a fun weekend project by [mudler](https://github.com/mudler) in order to try to create the necessary pieces for a full AI assistant like `ChatGPT`: the community is growing fast and we are working hard to make it better and more stable. If you want to help, please consider contributing (see below)!
-
-## 🚀 Features
+### 🚀 Features
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `gpt4all.cpp`, ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
- 🗣 [Text to Audio](https://localai.io/features/text-to-audio/)
diff --git a/docs/content/docs/reference/aio-images.md b/docs/content/docs/reference/aio-images.md
new file mode 100644
index 00000000..9c569fb5
--- /dev/null
+++ b/docs/content/docs/reference/aio-images.md
@@ -0,0 +1,39 @@
+
++++
+disableToc = false
+title = "All-In-One images"
+weight = 26
++++
+
+All-In-One images are images that come pre-configured with a set of models and backends to fully leverage almost all the LocalAI featureset. These images are available for both CPU and GPU environments. The AIO images are designed to be easy to use and requires no configuration. Models configuration can be found [here](https://github.com/mudler/LocalAI/tree/master/aio) separated by size.
+
+
+| Description | Quay | Docker Hub |
+| --- | --- |-----------------------------------------------|
+| Latest images for CPU | `quay.io/go-skynet/local-ai:latest-aio-cpu` | `localai/localai:latest-aio-cpu` |
+| Versioned image (e.g. for CPU) | `quay.io/go-skynet/local-ai:{{< version >}}-aio-cpu` | `localai/localai:{{< version >}}-aio-cpu` |
+| Latest images for Nvidia GPU (CUDA11) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-11` | `localai/localai:latest-aio-gpu-nvidia-cuda-11` |
+| Latest images for Nvidia GPU (CUDA12) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-12` | `localai/localai:latest-aio-gpu-nvidia-cuda-12` |
+| Latest images for AMD GPU | `quay.io/go-skynet/local-ai:latest-aio-gpu-hipblas` | `localai/localai:latest-aio-gpu-hipblas` |
+| Latest images for Intel GPU (sycl f16) | `quay.io/go-skynet/local-ai:latest-aio-gpu-intel-f16` | `localai/localai:latest-aio-gpu-intel-f16` |
+| Latest images for Intel GPU (sycl f32) | `quay.io/go-skynet/local-ai:latest-aio-gpu-intel-f32` | `localai/localai:latest-aio-gpu-intel-f32` |
+
+## Available environment variables
+
+The AIO Images are inheriting the same environment variables as the base images and the environment of LocalAI (that you can inspect by calling `--help`). However, it supports additional environment variables available only from the container image
+
+| Variable | Default | Description |
+| ---------------------| ------- | ----------- |
+| `SIZE` | Auto-detected | The size of the model to use. Available: `cpu`, `gpu-8g` |
+| `MODELS` | Auto-detected | A list of models YAML Configuration file URI/URL (see also [running models]({{%relref "docs/getting-started/run-other-models" %}})) |
+
+
+## Example
+
+Start the image with Docker:
+
+```bash
+docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu
+```
+
+LocalAI will automatically download all the required models, and will be available at [localhost:8080](http://localhost:8080/v1/models).
diff --git a/docs/content/docs/reference/container-images.md b/docs/content/docs/reference/container-images.md
new file mode 100644
index 00000000..6531dd97
--- /dev/null
+++ b/docs/content/docs/reference/container-images.md
@@ -0,0 +1,103 @@
+
++++
+disableToc = false
+title = "Available Container images"
+weight = 25
++++
+
+LocalAI provides a variety of images to support different environments. These images are available on [quay.io](https://quay.io/repository/go-skynet/local-ai?tab=tags) and [Docker Hub](https://hub.docker.com/r/localai/localai).
+
+> _For All-in-One image with a pre-configured set of models and backends, see the [AIO Images]({{%relref "docs/reference/aio-images" %}})._
+
+For GPU Acceleration support for Nvidia video graphic cards, use the Nvidia/CUDA images, if you don't have a GPU, use the CPU images. If you have AMD or Mac Silicon, see the [build section]({{%relref "docs/getting-started/build" %}}).
+
+{{% alert icon="💡" %}}
+
+**Available Images Types**:
+
+- Images ending with `-core` are smaller images without predownload python dependencies. Use these images if you plan to use `llama.cpp`, `stablediffusion-ncn`, `tinydream` or `rwkv` backends - if you are not sure which one to use, do **not** use these images.
+- Images containing the `aio` tag are all-in-one images with all the features enabled, and come with an opinionated set of configuration.
+- FFMpeg is **not** included in the default images due to [its licensing](https://www.ffmpeg.org/legal.html). If you need FFMpeg, use the images ending with `-ffmpeg`. Note that `ffmpeg` is needed in case of using `audio-to-text` LocalAI's features.
+- If using old and outdated CPUs and no GPUs you might need to set `REBUILD` to `true` as environment variable along with options to disable the flags which your CPU does not support, however note that inference will perform poorly and slow. See also [flagset compatibility]({{%relref "docs/getting-started/build#cpu-flagset-compatibility" %}}).
+
+{{% /alert %}}
+
+{{< tabs tabTotal="6" >}}
+{{% tab tabName="Vanilla / CPU Images" %}}
+
+| Description | Quay | Docker Hub |
+| --- | --- |-----------------------------------------------|
+| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master` | `localai/localai:master` |
+| Latest tag | `quay.io/go-skynet/local-ai:latest` | `localai/localai:latest` |
+| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}` | `localai/localai:{{< version >}}` |
+| Versioned image including FFMpeg| `quay.io/go-skynet/local-ai:{{< version >}}-ffmpeg` | `localai/localai:{{< version >}}-ffmpeg` |
+| Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-ffmpeg-core` | `localai/localai:{{< version >}}-ffmpeg-core` |
+
+{{% /tab %}}
+
+{{% tab tabName="GPU Images CUDA 11" %}}
+
+| Description | Quay | Docker Hub |
+| --- | --- |-------------------------------------------------------------|
+| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-cublas-cuda11` | `localai/localai:master-cublas-cuda11` |
+| Latest tag | `quay.io/go-skynet/local-ai:latest-cublas-cuda11` | `localai/localai:latest-cublas-cuda11` |
+| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11` | `localai/localai:{{< version >}}-cublas-cuda11` |
+| Versioned image including FFMpeg| `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11-ffmpeg` | `localai/localai:{{< version >}}-cublas-cuda11-ffmpeg` |
+| Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11-ffmpeg-core` | `localai/localai:{{< version >}}-cublas-cuda11-ffmpeg-core` |
+
+{{% /tab %}}
+
+{{% tab tabName="GPU Images CUDA 12" %}}
+
+| Description | Quay | Docker Hub |
+| --- | --- |-------------------------------------------------------------|
+| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-cublas-cuda12` | `localai/localai:master-cublas-cuda12` |
+| Latest tag | `quay.io/go-skynet/local-ai:latest-cublas-cuda12` | `localai/localai:latest-cublas-cuda12` |
+| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12` | `localai/localai:{{< version >}}-cublas-cuda12` |
+| Versioned image including FFMpeg| `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12-ffmpeg` | `localai/localai:{{< version >}}-cublas-cuda12-ffmpeg` |
+| Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12-ffmpeg-core` | `localai/localai:{{< version >}}-cublas-cuda12-ffmpeg-core` |
+
+{{% /tab %}}
+
+{{% tab tabName="Intel GPU (sycl f16)" %}}
+
+| Description | Quay | Docker Hub |
+| --- | --- |-------------------------------------------------------------|
+| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-sycl-f16` | `localai/localai:master-sycl-f16` |
+| Latest tag | `quay.io/go-skynet/local-ai:latest-sycl-f16` | `localai/localai:latest-sycl-f16` |
+| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16` | `localai/localai:{{< version >}}-sycl-f16` |
+| Versioned image including FFMpeg| `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16-ffmpeg` | `localai/localai:{{< version >}}-sycl-f16-ffmpeg` |
+| Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16-ffmpeg-core` | `localai/localai:{{< version >}}-sycl-f16-ffmpeg-core` |
+
+{{% /tab %}}
+
+{{% tab tabName="Intel GPU (sycl f32)" %}}
+
+| Description | Quay | Docker Hub |
+| --- | --- |-------------------------------------------------------------|
+| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-sycl-f32` | `localai/localai:master-sycl-f32` |
+| Latest tag | `quay.io/go-skynet/local-ai:latest-sycl-f32` | `localai/localai:latest-sycl-f32` |
+| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f32` | `localai/localai:{{< version >}}-sycl-f32` |
+| Versioned image including FFMpeg| `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f32-ffmpeg` | `localai/localai:{{< version >}}-sycl-f32-ffmpeg` |
+| Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f32-ffmpeg-core` | `localai/localai:{{< version >}}-sycl-f32-ffmpeg-core` |
+
+{{% /tab %}}
+
+{{% tab tabName="AMD GPU" %}}
+
+| Description | Quay | Docker Hub |
+| --- | --- |-------------------------------------------------------------|
+| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-hipblas` | `localai/localai:master-hipblas` |
+| Latest tag | `quay.io/go-skynet/local-ai:latest-hipblas` | `localai/localai:latest-hipblas` |
+| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-hipblas` | `localai/localai:{{< version >}}-hipblas` |
+| Versioned image including FFMpeg| `quay.io/go-skynet/local-ai:{{< version >}}-hipblas-ffmpeg` | `localai/localai:{{< version >}}-hipblas-ffmpeg` |
+| Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-hipblas-ffmpeg-core` | `localai/localai:{{< version >}}-hipblas-ffmpeg-core` |
+
+{{% /tab %}}
+
+{{< /tabs >}}
+
+## See Also
+
+- [GPU acceleration]({{%relref "docs/features/gpu-acceleration" %}})
+- [AIO Images]({{%relref "docs/reference/aio-images" %}})
\ No newline at end of file
diff --git a/tests/e2e-aio/e2e_suite_test.go b/tests/e2e-aio/e2e_suite_test.go
index 00fc6d2a..fa61c408 100644
--- a/tests/e2e-aio/e2e_suite_test.go
+++ b/tests/e2e-aio/e2e_suite_test.go
@@ -22,6 +22,7 @@ var containerImage = os.Getenv("LOCALAI_IMAGE")
var containerImageTag = os.Getenv("LOCALAI_IMAGE_TAG")
var modelsDir = os.Getenv("LOCALAI_MODELS_DIR")
var apiPort = os.Getenv("LOCALAI_API_PORT")
+var apiEndpoint = os.Getenv("LOCALAI_API_ENDPOINT")
func TestLocalAI(t *testing.T) {
RegisterFailHandler(Fail)
@@ -30,16 +31,45 @@ func TestLocalAI(t *testing.T) {
var _ = BeforeSuite(func() {
- if containerImage == "" {
- Fail("LOCALAI_IMAGE is not set")
- }
- if containerImageTag == "" {
- Fail("LOCALAI_IMAGE_TAG is not set")
- }
if apiPort == "" {
apiPort = "8080"
}
+ var defaultConfig openai.ClientConfig
+ if apiEndpoint == "" {
+ startDockerImage()
+ defaultConfig = openai.DefaultConfig("")
+ defaultConfig.BaseURL = "http://localhost:" + apiPort + "/v1"
+ } else {
+ fmt.Println("Default ", apiEndpoint)
+ defaultConfig = openai.DefaultConfig("")
+ defaultConfig.BaseURL = apiEndpoint
+ }
+
+ // Wait for API to be ready
+ client = openai.NewClientWithConfig(defaultConfig)
+
+ Eventually(func() error {
+ _, err := client.ListModels(context.TODO())
+ return err
+ }, "20m").ShouldNot(HaveOccurred())
+})
+
+var _ = AfterSuite(func() {
+ if resource != nil {
+ Expect(pool.Purge(resource)).To(Succeed())
+ }
+ //dat, err := os.ReadFile(resource.Container.LogPath)
+ //Expect(err).To(Not(HaveOccurred()))
+ //Expect(string(dat)).To(ContainSubstring("GRPC Service Ready"))
+ //fmt.Println(string(dat))
+})
+
+var _ = AfterEach(func() {
+ //Expect(dbClient.Clear()).To(Succeed())
+})
+
+func startDockerImage() {
p, err := dockertest.NewPool("")
Expect(err).To(Not(HaveOccurred()))
Expect(p.Client.Ping()).To(Succeed())
@@ -71,27 +101,4 @@ var _ = BeforeSuite(func() {
Expect(err).To(Not(HaveOccurred()))
resource = r
-
- defaultConfig := openai.DefaultConfig("")
- defaultConfig.BaseURL = "http://localhost:" + apiPort + "/v1"
-
- // Wait for API to be ready
- client = openai.NewClientWithConfig(defaultConfig)
-
- Eventually(func() error {
- _, err := client.ListModels(context.TODO())
- return err
- }, "20m").ShouldNot(HaveOccurred())
-})
-
-var _ = AfterSuite(func() {
- Expect(pool.Purge(resource)).To(Succeed())
- //dat, err := os.ReadFile(resource.Container.LogPath)
- //Expect(err).To(Not(HaveOccurred()))
- //Expect(string(dat)).To(ContainSubstring("GRPC Service Ready"))
- //fmt.Println(string(dat))
-})
-
-var _ = AfterEach(func() {
- //Expect(dbClient.Clear()).To(Succeed())
-})
+}
diff --git a/tests/e2e-aio/e2e_test.go b/tests/e2e-aio/e2e_test.go
index 03d9fda9..c52d789e 100644
--- a/tests/e2e-aio/e2e_test.go
+++ b/tests/e2e-aio/e2e_test.go
@@ -51,7 +51,7 @@ var _ = Describe("E2E test", func() {
)
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Data)).To(Equal(1), fmt.Sprint(resp))
- Expect(resp.Data[0].URL).To(ContainSubstring("http://localhost:8080"), fmt.Sprint(resp.Data[0].URL))
+ Expect(resp.Data[0].URL).To(ContainSubstring("png"), fmt.Sprint(resp.Data[0].URL))
})
})
Context("embeddings", func() {
From 42a4c86dca03baad7597389ca30029ae4c32e7a2 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Tue, 26 Mar 2024 00:33:46 +0100
Subject: [PATCH 0040/2750] :arrow_up: Update ggerganov/whisper.cpp (#1896)
Signed-off-by: GitHub
Co-authored-by: mudler
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index c12ea8c0..59477f59 100644
--- a/Makefile
+++ b/Makefile
@@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
# whisper.cpp version
-WHISPER_CPP_VERSION?=fff24a0148fe194df4997a738eeceddd724959c3
+WHISPER_CPP_VERSION?=1558ec5a16cb2b2a0bf54815df1d41f83dc3815b
# bert.cpp version
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
From 1395e505cd8f1cc90ce575602c7eb21706da6067 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Tue, 26 Mar 2024 00:34:10 +0100
Subject: [PATCH 0041/2750] :arrow_up: Update ggerganov/llama.cpp (#1897)
Signed-off-by: GitHub
Co-authored-by: mudler
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index 59477f59..518287da 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=a0e584defd8c16e7a51ab895f595df0448d710d0
+CPPLLAMA_VERSION?=b06c16ef9f81d84da520232c125d4d8a1d273736
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
From e58410fa99996d9927b06d5d1cab0e072486edac Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Tue, 26 Mar 2024 18:45:25 +0100
Subject: [PATCH 0042/2750] feat(aio): add intel profile (#1901)
* feat(aio): add intel profile
* docs: clarify AIO images features
---
README.md | 4 ++
aio/cpu/embeddings.yaml | 8 +---
aio/cpu/image-gen.yaml | 11 ++++-
aio/cpu/vision.yaml | 9 ----
aio/entrypoint.sh | 4 +-
aio/gpu-8g/embeddings.yaml | 1 -
aio/gpu-8g/image-gen.yaml | 1 -
aio/gpu-8g/vision.yaml | 2 -
aio/intel/embeddings.yaml | 12 ++++++
aio/intel/image-gen.yaml | 20 +++++++++
aio/intel/speech-to-text.yaml | 18 ++++++++
aio/intel/text-to-speech.yaml | 15 +++++++
aio/intel/text-to-text.yaml | 51 +++++++++++++++++++++++
aio/intel/vision.yaml | 35 ++++++++++++++++
docs/content/docs/overview.md | 1 -
docs/content/docs/reference/aio-images.md | 9 ++++
16 files changed, 178 insertions(+), 23 deletions(-)
create mode 100644 aio/intel/embeddings.yaml
create mode 100644 aio/intel/image-gen.yaml
create mode 100644 aio/intel/speech-to-text.yaml
create mode 100644 aio/intel/text-to-speech.yaml
create mode 100644 aio/intel/text-to-text.yaml
create mode 100644 aio/intel/vision.yaml
diff --git a/README.md b/README.md
index 7ba96ad5..8cf15d5a 100644
--- a/README.md
+++ b/README.md
@@ -38,6 +38,10 @@
+> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
+>
+> [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/) [ 🚀 Roadmap ](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
+
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that’s compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU.
diff --git a/aio/cpu/embeddings.yaml b/aio/cpu/embeddings.yaml
index bdee079c..8576746f 100644
--- a/aio/cpu/embeddings.yaml
+++ b/aio/cpu/embeddings.yaml
@@ -1,11 +1,5 @@
-backend: bert-embeddings
-embeddings: true
-f16: true
-
-gpu_layers: 90
-mmap: true
name: text-embedding-ada-002
-
+backend: bert-embeddings
parameters:
model: huggingface://mudler/all-MiniLM-L6-v2/ggml-model-q4_0.bin
diff --git a/aio/cpu/image-gen.yaml b/aio/cpu/image-gen.yaml
index 3b9c2eec..9de88a3f 100644
--- a/aio/cpu/image-gen.yaml
+++ b/aio/cpu/image-gen.yaml
@@ -50,4 +50,13 @@ download_files:
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/UNetModel-MHA-fp16.bin"
- filename: "stablediffusion_assets/vocab.txt"
sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
- uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt"
\ No newline at end of file
+ uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt"
+
+usage: |
+ curl http://localhost:8080/v1/images/generations \
+ -H "Content-Type: application/json" \
+ -d '{
+ "prompt": "|",
+ "step": 25,
+ "size": "512x512"
+ }'
\ No newline at end of file
diff --git a/aio/cpu/vision.yaml b/aio/cpu/vision.yaml
index 0777f715..3b466d37 100644
--- a/aio/cpu/vision.yaml
+++ b/aio/cpu/vision.yaml
@@ -1,8 +1,6 @@
backend: llama-cpp
context_size: 4096
f16: true
-
-gpu_layers: 90
mmap: true
name: gpt-4-vision-preview
@@ -14,13 +12,6 @@ roles:
mmproj: bakllava-mmproj.gguf
parameters:
model: bakllava.gguf
- temperature: 0.2
- top_k: 40
- top_p: 0.95
- seed: -1
-mirostat: 2
-mirostat_eta: 1.0
-mirostat_tau: 1.0
template:
chat: |
diff --git a/aio/entrypoint.sh b/aio/entrypoint.sh
index 795cb86a..d04e5642 100755
--- a/aio/entrypoint.sh
+++ b/aio/entrypoint.sh
@@ -30,6 +30,7 @@ function detect_gpu() {
echo "Intel GPU detected"
if [ -d /opt/intel ]; then
GPU_ACCELERATION=true
+ GPU_VENDOR=intel
else
echo "Intel GPU detected, but Intel GPU drivers are not installed. GPU acceleration will not be available."
fi
@@ -75,7 +76,8 @@ function detect_gpu_size() {
echo "Unable to determine NVIDIA GPU memory size. Falling back to CPU."
GPU_SIZE=gpu-8g
fi
-
+ elif [ "$GPU_ACCELERATION" = true ] && [ "$GPU_VENDOR" = "intel" ]; then
+ GPU_SIZE=intel
# Default to a generic GPU size until we implement GPU size detection for non NVIDIA GPUs
elif [ "$GPU_ACCELERATION" = true ]; then
echo "Non-NVIDIA GPU detected. Specific GPU memory size detection is not implemented."
diff --git a/aio/gpu-8g/embeddings.yaml b/aio/gpu-8g/embeddings.yaml
index 98b519d5..99a74ef7 100644
--- a/aio/gpu-8g/embeddings.yaml
+++ b/aio/gpu-8g/embeddings.yaml
@@ -1,6 +1,5 @@
name: text-embedding-ada-002
backend: sentencetransformers
-embeddings: true
parameters:
model: all-MiniLM-L6-v2
diff --git a/aio/gpu-8g/image-gen.yaml b/aio/gpu-8g/image-gen.yaml
index 53994ebb..0074aaf0 100644
--- a/aio/gpu-8g/image-gen.yaml
+++ b/aio/gpu-8g/image-gen.yaml
@@ -20,7 +20,6 @@ usage: |
-H "Content-Type: application/json" \
-d '{
"prompt": "|",
- "model": "dreamshaper",
"step": 25,
"size": "512x512"
}'
\ No newline at end of file
diff --git a/aio/gpu-8g/vision.yaml b/aio/gpu-8g/vision.yaml
index 02542503..db039279 100644
--- a/aio/gpu-8g/vision.yaml
+++ b/aio/gpu-8g/vision.yaml
@@ -1,8 +1,6 @@
backend: llama-cpp
context_size: 4096
f16: true
-
-gpu_layers: 90
mmap: true
name: gpt-4-vision-preview
diff --git a/aio/intel/embeddings.yaml b/aio/intel/embeddings.yaml
new file mode 100644
index 00000000..99a74ef7
--- /dev/null
+++ b/aio/intel/embeddings.yaml
@@ -0,0 +1,12 @@
+name: text-embedding-ada-002
+backend: sentencetransformers
+parameters:
+ model: all-MiniLM-L6-v2
+
+usage: |
+ You can test this model with curl like this:
+
+ curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
+ "input": "Your text string goes here",
+ "model": "text-embedding-ada-002"
+ }'
\ No newline at end of file
diff --git a/aio/intel/image-gen.yaml b/aio/intel/image-gen.yaml
new file mode 100644
index 00000000..eb724c92
--- /dev/null
+++ b/aio/intel/image-gen.yaml
@@ -0,0 +1,20 @@
+name: stablediffusion
+parameters:
+ model: runwayml/stable-diffusion-v1-5
+backend: diffusers
+step: 25
+f16: true
+diffusers:
+ pipeline_type: StableDiffusionPipeline
+ cuda: true
+ enable_parameters: "negative_prompt,num_inference_steps"
+ scheduler_type: "k_dpmpp_2m"
+
+usage: |
+ curl http://localhost:8080/v1/images/generations \
+ -H "Content-Type: application/json" \
+ -d '{
+ "prompt": "|",
+ "step": 25,
+ "size": "512x512"
+ }'
\ No newline at end of file
diff --git a/aio/intel/speech-to-text.yaml b/aio/intel/speech-to-text.yaml
new file mode 100644
index 00000000..77850d79
--- /dev/null
+++ b/aio/intel/speech-to-text.yaml
@@ -0,0 +1,18 @@
+name: whisper-1
+backend: whisper
+parameters:
+ model: ggml-whisper-base.bin
+
+usage: |
+ ## example audio file
+ wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
+
+ ## Send the example audio file to the transcriptions endpoint
+ curl http://localhost:8080/v1/audio/transcriptions \
+ -H "Content-Type: multipart/form-data" \
+ -F file="@$PWD/gb1.ogg" -F model="whisper-1"
+
+download_files:
+- filename: "ggml-whisper-base.bin"
+ sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
+ uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"
\ No newline at end of file
diff --git a/aio/intel/text-to-speech.yaml b/aio/intel/text-to-speech.yaml
new file mode 100644
index 00000000..8d875a29
--- /dev/null
+++ b/aio/intel/text-to-speech.yaml
@@ -0,0 +1,15 @@
+name: tts-1
+download_files:
+ - filename: voice-en-us-amy-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
+
+parameters:
+ model: en-us-amy-low.onnx
+
+usage: |
+ To test if this model works as expected, you can use the following curl command:
+
+ curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
+ "model":"tts-1",
+ "input": "Hi, this is a test."
+ }'
\ No newline at end of file
diff --git a/aio/intel/text-to-text.yaml b/aio/intel/text-to-text.yaml
new file mode 100644
index 00000000..ef36b562
--- /dev/null
+++ b/aio/intel/text-to-text.yaml
@@ -0,0 +1,51 @@
+name: gpt-4
+mmap: false
+f16: false
+parameters:
+ model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf
+
+roles:
+ assistant_function_call: assistant
+ function: tool
+template:
+ chat_message: |
+ <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "function"}}{{.Role}}{{else if eq .RoleName "user"}}user{{end}}
+ {{ if eq .RoleName "assistant_function_call" }}{{end}}
+ {{ if eq .RoleName "function" }}{{end}}
+ {{if .Content}}{{.Content}}{{end}}
+ {{if .FunctionCall}}{{toJson .FunctionCall}}{{end}}
+ {{ if eq .RoleName "assistant_function_call" }} {{end}}
+ {{ if eq .RoleName "function" }}{{end}}
+ <|im_end|>
+ # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
+ function: |
+ <|im_start|>system
+ You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
+
+ {{range .Functions}}
+ {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
+ {{end}}
+
+ Use the following pydantic model json schema for each tool call you will make:
+ {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
+ For each function call return a json object with function name and arguments within XML tags as follows:
+
+ {'arguments': , 'name': }
+ <|im_end|>
+ {{.Input}}
+ <|im_start|>assistant
+
+ chat: |
+ {{.Input}}
+ <|im_start|>assistant
+ completion: |
+ {{.Input}}
+context_size: 4096
+stopwords:
+- <|im_end|>
+-
+usage: |
+ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+ "model": "gpt-4",
+ "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
+ }'
diff --git a/aio/intel/vision.yaml b/aio/intel/vision.yaml
new file mode 100644
index 00000000..52843162
--- /dev/null
+++ b/aio/intel/vision.yaml
@@ -0,0 +1,35 @@
+backend: llama-cpp
+context_size: 4096
+mmap: false
+f16: false
+name: gpt-4-vision-preview
+
+roles:
+ user: "USER:"
+ assistant: "ASSISTANT:"
+ system: "SYSTEM:"
+
+mmproj: llava-v1.6-7b-mmproj-f16.gguf
+parameters:
+ model: llava-v1.6-mistral-7b.Q5_K_M.gguf
+ temperature: 0.2
+ top_k: 40
+ top_p: 0.95
+ seed: -1
+
+template:
+ chat: |
+ A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
+ {{.Input}}
+ ASSISTANT:
+
+download_files:
+- filename: llava-v1.6-mistral-7b.Q5_K_M.gguf
+ uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q5_K_M.gguf
+- filename: llava-v1.6-7b-mmproj-f16.gguf
+ uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf
+
+usage: |
+ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+ "model": "gpt-4-vision-preview",
+ "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
diff --git a/docs/content/docs/overview.md b/docs/content/docs/overview.md
index 40ec9e4f..3c3a397d 100644
--- a/docs/content/docs/overview.md
+++ b/docs/content/docs/overview.md
@@ -49,7 +49,6 @@ icon = "info"
-
> 💡 Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [💭Discord](https://discord.gg/uJAeKSAGDy)
>
> [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/) [ 🚀 Roadmap ](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
diff --git a/docs/content/docs/reference/aio-images.md b/docs/content/docs/reference/aio-images.md
index 9c569fb5..331892e9 100644
--- a/docs/content/docs/reference/aio-images.md
+++ b/docs/content/docs/reference/aio-images.md
@@ -7,6 +7,15 @@ weight = 26
All-In-One images are images that come pre-configured with a set of models and backends to fully leverage almost all the LocalAI featureset. These images are available for both CPU and GPU environments. The AIO images are designed to be easy to use and requires no configuration. Models configuration can be found [here](https://github.com/mudler/LocalAI/tree/master/aio) separated by size.
+What you can find configured out of the box:
+
+- Image generation
+- Text generation
+- Text to audio
+- Audio transcription
+- Embeddings
+- GPT Vision
+
| Description | Quay | Docker Hub |
| --- | --- |-----------------------------------------------|
From b7ffe6621962952e2a69a6caeb6224f00bcf377d Mon Sep 17 00:00:00 2001
From: "Sebastian.W"
Date: Wed, 27 Mar 2024 01:48:14 +0800
Subject: [PATCH 0043/2750] Enhance autogptq backend to support VL models
(#1860)
* Enhance autogptq backend to support VL models
* update dependencies for autogptq
* remove redundant auto-gptq dependency
* Convert base64 to image_url for Qwen-VL model
* implemented model inference for qwen-vl
* remove user prompt from generated answer
* fixed write image error
---------
Co-authored-by: Binghua Wu
---
backend/python/autogptq/autogptq.py | 56 ++++++++++++++++---
backend/python/autogptq/autogptq.yml | 13 ++++-
.../transformers/transformers-nvidia.yml | 9 ++-
.../transformers/transformers-rocm.yml | 6 +-
.../common-env/transformers/transformers.yml | 9 ++-
5 files changed, 75 insertions(+), 18 deletions(-)
diff --git a/backend/python/autogptq/autogptq.py b/backend/python/autogptq/autogptq.py
index ffb37569..bbafdd92 100755
--- a/backend/python/autogptq/autogptq.py
+++ b/backend/python/autogptq/autogptq.py
@@ -5,12 +5,14 @@ import signal
import sys
import os
import time
+import base64
import grpc
import backend_pb2
import backend_pb2_grpc
+
from auto_gptq import AutoGPTQForCausalLM
-from transformers import AutoTokenizer
+from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import TextGenerationPipeline
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
@@ -28,9 +30,19 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
if request.Device != "":
device = request.Device
- tokenizer = AutoTokenizer.from_pretrained(request.Model, use_fast=request.UseFastTokenizer)
+ # support loading local model files
+ model_path = os.path.join(os.environ.get('MODELS_PATH', './'), request.Model)
+ tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True, trust_remote_code=request.TrustRemoteCode)
- model = AutoGPTQForCausalLM.from_quantized(request.Model,
+ # support model `Qwen/Qwen-VL-Chat-Int4`
+ if "qwen-vl" in request.Model.lower():
+ self.model_name = "Qwen-VL-Chat"
+ model = AutoModelForCausalLM.from_pretrained(model_path,
+ trust_remote_code=request.TrustRemoteCode,
+ use_triton=request.UseTriton,
+ device_map="auto").eval()
+ else:
+ model = AutoGPTQForCausalLM.from_quantized(model_path,
model_basename=request.ModelBaseName,
use_safetensors=True,
trust_remote_code=request.TrustRemoteCode,
@@ -55,6 +67,11 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
if request.TopP != 0.0:
top_p = request.TopP
+
+ prompt_images = self.recompile_vl_prompt(request)
+ compiled_prompt = prompt_images[0]
+ print(f"Prompt: {compiled_prompt}", file=sys.stderr)
+
# Implement Predict RPC
pipeline = TextGenerationPipeline(
model=self.model,
@@ -64,10 +81,17 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
top_p=top_p,
repetition_penalty=penalty,
)
- t = pipeline(request.Prompt)[0]["generated_text"]
- # Remove prompt from response if present
- if request.Prompt in t:
- t = t.replace(request.Prompt, "")
+ t = pipeline(compiled_prompt)[0]["generated_text"]
+ print(f"generated_text: {t}", file=sys.stderr)
+
+ if compiled_prompt in t:
+ t = t.replace(compiled_prompt, "")
+ # house keeping. Remove the image files from /tmp folder
+ for img_path in prompt_images[1]:
+ try:
+ os.remove(img_path)
+ except Exception as e:
+ print(f"Error removing image file: {img_path}, {e}", file=sys.stderr)
return backend_pb2.Result(message=bytes(t, encoding='utf-8'))
@@ -78,6 +102,24 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
# Not implemented yet
return self.Predict(request, context)
+ def recompile_vl_prompt(self, request):
+ prompt = request.Prompt
+ image_paths = []
+
+ if "qwen-vl" in self.model_name.lower():
+ # request.Images is an array which contains base64 encoded images. Iterate the request.Images array, decode and save each image to /tmp folder with a random filename.
+ # Then, save the image file paths to an array "image_paths".
+ # read "request.Prompt", replace "[img-%d]" with the image file paths in the order they appear in "image_paths". Save the new prompt to "prompt".
+ for i, img in enumerate(request.Images):
+ timestamp = str(int(time.time() * 1000)) # Generate timestamp
+ img_path = f"/tmp/vl-{timestamp}.jpg" # Use timestamp in filename
+ with open(img_path, "wb") as f:
+ f.write(base64.b64decode(img))
+ image_paths.append(img_path)
+ prompt = prompt.replace(f"[img-{i}]", " " + img_path + ",")
+ else:
+ prompt = request.Prompt
+ return (prompt, image_paths)
def serve(address):
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
diff --git a/backend/python/autogptq/autogptq.yml b/backend/python/autogptq/autogptq.yml
index 19b8e41d..d22b354e 100644
--- a/backend/python/autogptq/autogptq.yml
+++ b/backend/python/autogptq/autogptq.yml
@@ -1,3 +1,7 @@
+####
+# Attention! This file is abandoned.
+# Please use the ../common-env/transformers/transformers.yml file to manage dependencies.
+###
name: autogptq
channels:
- defaults
@@ -24,12 +28,12 @@ dependencies:
- xz=5.4.2=h5eee18b_0
- zlib=1.2.13=h5eee18b_0
- pip:
- - accelerate==0.23.0
+ - accelerate==0.27.0
- aiohttp==3.8.5
- aiosignal==1.3.1
- async-timeout==4.0.3
- attrs==23.1.0
- - auto-gptq==0.4.2
+ - auto-gptq==0.7.1
- certifi==2023.7.22
- charset-normalizer==3.3.0
- datasets==2.14.5
@@ -59,6 +63,7 @@ dependencies:
- nvidia-nccl-cu12==2.18.1
- nvidia-nvjitlink-cu12==12.2.140
- nvidia-nvtx-cu12==12.1.105
+ - optimum==1.17.1
- packaging==23.2
- pandas==2.1.1
- peft==0.5.0
@@ -75,9 +80,11 @@ dependencies:
- six==1.16.0
- sympy==1.12
- tokenizers==0.14.0
- - torch==2.1.0
- tqdm==4.66.1
+ - torch==2.2.1
+ - torchvision==0.17.1
- transformers==4.34.0
+ - transformers_stream_generator==0.0.5
- triton==2.1.0
- typing-extensions==4.8.0
- tzdata==2023.3
diff --git a/backend/python/common-env/transformers/transformers-nvidia.yml b/backend/python/common-env/transformers/transformers-nvidia.yml
index 7daafe51..55361234 100644
--- a/backend/python/common-env/transformers/transformers-nvidia.yml
+++ b/backend/python/common-env/transformers/transformers-nvidia.yml
@@ -24,10 +24,11 @@ dependencies:
- xz=5.4.2=h5eee18b_0
- zlib=1.2.13=h5eee18b_0
- pip:
- - accelerate==0.23.0
+ - accelerate==0.27.0
- aiohttp==3.8.5
- aiosignal==1.3.1
- async-timeout==4.0.3
+ - auto-gptq==0.7.1
- attrs==23.1.0
- bark==0.1.5
- bitsandbytes==0.43.0
@@ -69,6 +70,7 @@ dependencies:
- nvidia-nccl-cu12==2.18.1
- nvidia-nvjitlink-cu12==12.2.140
- nvidia-nvtx-cu12==12.1.105
+ - optimum==1.17.1
- packaging==23.2
- pandas
- peft==0.5.0
@@ -87,7 +89,8 @@ dependencies:
- six==1.16.0
- sympy==1.12
- tokenizers
- - torch==2.1.2
+ - torch==2.2.1
+ - torchvision==0.17.1
- torchaudio==2.1.2
- tqdm==4.66.1
- triton==2.1.0
@@ -95,7 +98,6 @@ dependencies:
- tzdata==2023.3
- urllib3==1.26.17
- xxhash==3.4.1
- - auto-gptq==0.6.0
- yarl==1.9.2
- soundfile
- langid
@@ -116,5 +118,6 @@ dependencies:
- vocos
- vllm==0.3.2
- transformers>=4.38.2 # Updated Version
+ - transformers_stream_generator==0.0.5
- xformers==0.0.23.post1
prefix: /opt/conda/envs/transformers
diff --git a/backend/python/common-env/transformers/transformers-rocm.yml b/backend/python/common-env/transformers/transformers-rocm.yml
index 5c18d301..fa245bf4 100644
--- a/backend/python/common-env/transformers/transformers-rocm.yml
+++ b/backend/python/common-env/transformers/transformers-rocm.yml
@@ -26,7 +26,8 @@ dependencies:
- pip:
- --pre
- --extra-index-url https://download.pytorch.org/whl/nightly/
- - accelerate==0.23.0
+ - accelerate==0.27.0
+ - auto-gptq==0.7.1
- aiohttp==3.8.5
- aiosignal==1.3.1
- async-timeout==4.0.3
@@ -82,7 +83,6 @@ dependencies:
- triton==2.1.0
- typing-extensions==4.8.0
- tzdata==2023.3
- - auto-gptq==0.6.0
- urllib3==1.26.17
- xxhash==3.4.1
- yarl==1.9.2
@@ -90,6 +90,7 @@ dependencies:
- langid
- wget
- unidecode
+ - optimum==1.17.1
- pyopenjtalk-prebuilt
- pypinyin
- inflect
@@ -105,5 +106,6 @@ dependencies:
- vocos
- vllm==0.3.2
- transformers>=4.38.2 # Updated Version
+ - transformers_stream_generator==0.0.5
- xformers==0.0.23.post1
prefix: /opt/conda/envs/transformers
diff --git a/backend/python/common-env/transformers/transformers.yml b/backend/python/common-env/transformers/transformers.yml
index 5726abaf..bdf8c36f 100644
--- a/backend/python/common-env/transformers/transformers.yml
+++ b/backend/python/common-env/transformers/transformers.yml
@@ -24,9 +24,10 @@ dependencies:
- xz=5.4.2=h5eee18b_0
- zlib=1.2.13=h5eee18b_0
- pip:
- - accelerate==0.23.0
+ - accelerate==0.27.0
- aiohttp==3.8.5
- aiosignal==1.3.1
+ - auto-gptq==0.7.1
- async-timeout==4.0.3
- attrs==23.1.0
- bark==0.1.5
@@ -56,6 +57,7 @@ dependencies:
- multiprocess==0.70.15
- networkx
- numpy==1.26.0
+ - optimum==1.17.1
- packaging==23.2
- pandas
- peft==0.5.0
@@ -74,13 +76,13 @@ dependencies:
- six==1.16.0
- sympy==1.12
- tokenizers
- - torch==2.1.2
+ - torch==2.2.1
+ - torchvision==0.17.1
- torchaudio==2.1.2
- tqdm==4.66.1
- triton==2.1.0
- typing-extensions==4.8.0
- tzdata==2023.3
- - auto-gptq==0.6.0
- urllib3==1.26.17
- xxhash==3.4.1
- yarl==1.9.2
@@ -103,5 +105,6 @@ dependencies:
- vocos
- vllm==0.3.2
- transformers>=4.38.2 # Updated Version
+ - transformers_stream_generator==0.0.5
- xformers==0.0.23.post1
prefix: /opt/conda/envs/transformers
From 2d7913b3bef9d6135510572e660e72dfbda29050 Mon Sep 17 00:00:00 2001
From: Steven Christou <1302212+christ66@users.noreply.github.com>
Date: Tue, 26 Mar 2024 10:54:35 -0700
Subject: [PATCH 0044/2750] feat(assistant): Assistant and AssistantFiles api
(#1803)
* Initial implementation of assistants api
* Move load/save configs to utils
* Save assistant and assistantfiles config to disk.
* Add tsets for assistant api
* Fix models path spelling mistake.
* Remove personal go.mod information
---------
Co-authored-by: Ettore Di Giacinto
---
core/config/application_config.go | 7 +
core/http/api.go | 34 +-
core/http/endpoints/openai/assistant.go | 515 +++++++++++++++++++
core/http/endpoints/openai/assistant_test.go | 456 ++++++++++++++++
core/http/endpoints/openai/files.go | 64 +--
core/http/endpoints/openai/files_test.go | 45 +-
main.go | 7 +
pkg/utils/config.go | 41 ++
8 files changed, 1108 insertions(+), 61 deletions(-)
create mode 100644 core/http/endpoints/openai/assistant.go
create mode 100644 core/http/endpoints/openai/assistant_test.go
create mode 100644 pkg/utils/config.go
diff --git a/core/config/application_config.go b/core/config/application_config.go
index 03242c3c..c2d4e13a 100644
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@@ -20,6 +20,7 @@ type ApplicationConfig struct {
ImageDir string
AudioDir string
UploadDir string
+ ConfigsDir string
CORS bool
PreloadJSONModels string
PreloadModelsFromPath string
@@ -252,6 +253,12 @@ func WithUploadDir(uploadDir string) AppOption {
}
}
+func WithConfigsDir(configsDir string) AppOption {
+ return func(o *ApplicationConfig) {
+ o.ConfigsDir = configsDir
+ }
+}
+
func WithApiKeys(apiKeys []string) AppOption {
return func(o *ApplicationConfig) {
o.ApiKeys = apiKeys
diff --git a/core/http/api.go b/core/http/api.go
index 039e835b..de0a4939 100644
--- a/core/http/api.go
+++ b/core/http/api.go
@@ -3,6 +3,7 @@ package http
import (
"encoding/json"
"errors"
+ "github.com/go-skynet/LocalAI/pkg/utils"
"os"
"strings"
@@ -155,8 +156,17 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
}{Version: internal.PrintableVersion()})
})
- // Load upload json
- openai.LoadUploadConfig(appConfig.UploadDir)
+ // Make sure directories exists
+ os.MkdirAll(appConfig.ImageDir, 0755)
+ os.MkdirAll(appConfig.AudioDir, 0755)
+ os.MkdirAll(appConfig.UploadDir, 0755)
+ os.MkdirAll(appConfig.ConfigsDir, 0755)
+ os.MkdirAll(appConfig.ModelPath, 0755)
+
+ // Load config jsons
+ utils.LoadConfig(appConfig.UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles)
+ utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants)
+ utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles)
modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService)
app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint())
@@ -189,6 +199,26 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
app.Post("/v1/edits", auth, openai.EditEndpoint(cl, ml, appConfig))
app.Post("/edits", auth, openai.EditEndpoint(cl, ml, appConfig))
+ // assistant
+ app.Get("/v1/assistants", openai.ListAssistantsEndpoint(cl, ml, appConfig))
+ app.Get("/assistants", openai.ListAssistantsEndpoint(cl, ml, appConfig))
+ app.Post("/v1/assistants", openai.CreateAssistantEndpoint(cl, ml, appConfig))
+ app.Post("/assistants", openai.CreateAssistantEndpoint(cl, ml, appConfig))
+ app.Delete("/v1/assistants/:assistant_id", openai.DeleteAssistantEndpoint(cl, ml, appConfig))
+ app.Delete("/assistants/:assistant_id", openai.DeleteAssistantEndpoint(cl, ml, appConfig))
+ app.Get("/v1/assistants/:assistant_id", openai.GetAssistantEndpoint(cl, ml, appConfig))
+ app.Get("/assistants/:assistant_id", openai.GetAssistantEndpoint(cl, ml, appConfig))
+ app.Post("/v1/assistants/:assistant_id", openai.ModifyAssistantEndpoint(cl, ml, appConfig))
+ app.Post("/assistants/:assistant_id", openai.ModifyAssistantEndpoint(cl, ml, appConfig))
+ app.Get("/v1/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
+ app.Get("/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
+ app.Post("/v1/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
+ app.Post("/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
+ app.Delete("/v1/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
+ app.Delete("/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
+ app.Get("/v1/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(cl, ml, appConfig))
+ app.Get("/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(cl, ml, appConfig))
+
// files
app.Post("/v1/files", auth, openai.UploadFilesEndpoint(cl, appConfig))
app.Post("/files", auth, openai.UploadFilesEndpoint(cl, appConfig))
diff --git a/core/http/endpoints/openai/assistant.go b/core/http/endpoints/openai/assistant.go
new file mode 100644
index 00000000..0e0d8a99
--- /dev/null
+++ b/core/http/endpoints/openai/assistant.go
@@ -0,0 +1,515 @@
+package openai
+
+import (
+ "fmt"
+ "github.com/go-skynet/LocalAI/core/config"
+ model "github.com/go-skynet/LocalAI/pkg/model"
+ "github.com/go-skynet/LocalAI/pkg/utils"
+ "github.com/gofiber/fiber/v2"
+ "github.com/rs/zerolog/log"
+ "net/http"
+ "sort"
+ "strconv"
+ "strings"
+ "sync/atomic"
+ "time"
+)
+
+// ToolType defines a type for tool options
+type ToolType string
+
+const (
+ CodeInterpreter ToolType = "code_interpreter"
+ Retrieval ToolType = "retrieval"
+ Function ToolType = "function"
+
+ MaxCharacterInstructions = 32768
+ MaxCharacterDescription = 512
+ MaxCharacterName = 256
+ MaxToolsSize = 128
+ MaxFileIdSize = 20
+ MaxCharacterMetadataKey = 64
+ MaxCharacterMetadataValue = 512
+)
+
+type Tool struct {
+ Type ToolType `json:"type"`
+}
+
+// Assistant represents the structure of an assistant object from the OpenAI API.
+type Assistant struct {
+ ID string `json:"id"` // The unique identifier of the assistant.
+ Object string `json:"object"` // Object type, which is "assistant".
+ Created int64 `json:"created"` // The time at which the assistant was created.
+ Model string `json:"model"` // The model ID used by the assistant.
+ Name string `json:"name,omitempty"` // The name of the assistant.
+ Description string `json:"description,omitempty"` // The description of the assistant.
+ Instructions string `json:"instructions,omitempty"` // The system instructions that the assistant uses.
+ Tools []Tool `json:"tools,omitempty"` // A list of tools enabled on the assistant.
+ FileIDs []string `json:"file_ids,omitempty"` // A list of file IDs attached to this assistant.
+ Metadata map[string]string `json:"metadata,omitempty"` // Set of key-value pairs attached to the assistant.
+}
+
+var (
+ Assistants = []Assistant{} // better to return empty array instead of "null"
+ AssistantsConfigFile = "assistants.json"
+)
+
+type AssistantRequest struct {
+ Model string `json:"model"`
+ Name string `json:"name,omitempty"`
+ Description string `json:"description,omitempty"`
+ Instructions string `json:"instructions,omitempty"`
+ Tools []Tool `json:"tools,omitempty"`
+ FileIDs []string `json:"file_ids,omitempty"`
+ Metadata map[string]string `json:"metadata,omitempty"`
+}
+
+func CreateAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+ return func(c *fiber.Ctx) error {
+ request := new(AssistantRequest)
+ if err := c.BodyParser(request); err != nil {
+ log.Warn().AnErr("Unable to parse AssistantRequest", err)
+ return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Cannot parse JSON"})
+ }
+
+ if !modelExists(ml, request.Model) {
+ log.Warn().Msgf("Model: %s was not found in list of models.", request.Model)
+ return c.Status(fiber.StatusBadRequest).SendString("Model " + request.Model + " not found")
+ }
+
+ if request.Tools == nil {
+ request.Tools = []Tool{}
+ }
+
+ if request.FileIDs == nil {
+ request.FileIDs = []string{}
+ }
+
+ if request.Metadata == nil {
+ request.Metadata = make(map[string]string)
+ }
+
+ id := "asst_" + strconv.FormatInt(generateRandomID(), 10)
+
+ assistant := Assistant{
+ ID: id,
+ Object: "assistant",
+ Created: time.Now().Unix(),
+ Model: request.Model,
+ Name: request.Name,
+ Description: request.Description,
+ Instructions: request.Instructions,
+ Tools: request.Tools,
+ FileIDs: request.FileIDs,
+ Metadata: request.Metadata,
+ }
+
+ Assistants = append(Assistants, assistant)
+ utils.SaveConfig(appConfig.ConfigsDir, AssistantsConfigFile, Assistants)
+ return c.Status(fiber.StatusOK).JSON(assistant)
+ }
+}
+
+var currentId int64 = 0
+
+func generateRandomID() int64 {
+ atomic.AddInt64(¤tId, 1)
+ return currentId
+}
+
+func ListAssistantsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+ return func(c *fiber.Ctx) error {
+ // Because we're altering the existing assistants list we should just duplicate it for now.
+ returnAssistants := Assistants
+ // Parse query parameters
+ limitQuery := c.Query("limit", "20")
+ orderQuery := c.Query("order", "desc")
+ afterQuery := c.Query("after")
+ beforeQuery := c.Query("before")
+
+ // Convert string limit to integer
+ limit, err := strconv.Atoi(limitQuery)
+ if err != nil {
+ return c.Status(http.StatusBadRequest).SendString(fmt.Sprintf("Invalid limit query value: %s", limitQuery))
+ }
+
+ // Sort assistants
+ sort.SliceStable(returnAssistants, func(i, j int) bool {
+ if orderQuery == "asc" {
+ return returnAssistants[i].Created < returnAssistants[j].Created
+ }
+ return returnAssistants[i].Created > returnAssistants[j].Created
+ })
+
+ // After and before cursors
+ if afterQuery != "" {
+ returnAssistants = filterAssistantsAfterID(returnAssistants, afterQuery)
+ }
+ if beforeQuery != "" {
+ returnAssistants = filterAssistantsBeforeID(returnAssistants, beforeQuery)
+ }
+
+ // Apply limit
+ if limit < len(returnAssistants) {
+ returnAssistants = returnAssistants[:limit]
+ }
+
+ return c.JSON(returnAssistants)
+ }
+}
+
+// FilterAssistantsBeforeID filters out those assistants whose ID comes before the given ID
+// We assume that the assistants are already sorted
+func filterAssistantsBeforeID(assistants []Assistant, id string) []Assistant {
+ idInt, err := strconv.Atoi(id)
+ if err != nil {
+ return assistants // Return original slice if invalid id format is provided
+ }
+
+ var filteredAssistants []Assistant
+
+ for _, assistant := range assistants {
+ aid, err := strconv.Atoi(strings.TrimPrefix(assistant.ID, "asst_"))
+ if err != nil {
+ continue // Skip if invalid id in assistant
+ }
+
+ if aid < idInt {
+ filteredAssistants = append(filteredAssistants, assistant)
+ }
+ }
+
+ return filteredAssistants
+}
+
+// FilterAssistantsAfterID filters out those assistants whose ID comes after the given ID
+// We assume that the assistants are already sorted
+func filterAssistantsAfterID(assistants []Assistant, id string) []Assistant {
+ idInt, err := strconv.Atoi(id)
+ if err != nil {
+ return assistants // Return original slice if invalid id format is provided
+ }
+
+ var filteredAssistants []Assistant
+
+ for _, assistant := range assistants {
+ aid, err := strconv.Atoi(strings.TrimPrefix(assistant.ID, "asst_"))
+ if err != nil {
+ continue // Skip if invalid id in assistant
+ }
+
+ if aid > idInt {
+ filteredAssistants = append(filteredAssistants, assistant)
+ }
+ }
+
+ return filteredAssistants
+}
+
+func modelExists(ml *model.ModelLoader, modelName string) (found bool) {
+ found = false
+ models, err := ml.ListModels()
+ if err != nil {
+ return
+ }
+
+ for _, model := range models {
+ if model == modelName {
+ found = true
+ return
+ }
+ }
+ return
+}
+
+func DeleteAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+ type DeleteAssistantResponse struct {
+ ID string `json:"id"`
+ Object string `json:"object"`
+ Deleted bool `json:"deleted"`
+ }
+
+ return func(c *fiber.Ctx) error {
+ assistantID := c.Params("assistant_id")
+ if assistantID == "" {
+ return c.Status(fiber.StatusBadRequest).SendString("parameter assistant_id is required")
+ }
+
+ for i, assistant := range Assistants {
+ if assistant.ID == assistantID {
+ Assistants = append(Assistants[:i], Assistants[i+1:]...)
+ utils.SaveConfig(appConfig.ConfigsDir, AssistantsConfigFile, Assistants)
+ return c.Status(fiber.StatusOK).JSON(DeleteAssistantResponse{
+ ID: assistantID,
+ Object: "assistant.deleted",
+ Deleted: true,
+ })
+ }
+ }
+
+ log.Warn().Msgf("Unable to find assistant %s for deletion", assistantID)
+ return c.Status(fiber.StatusNotFound).JSON(DeleteAssistantResponse{
+ ID: assistantID,
+ Object: "assistant.deleted",
+ Deleted: false,
+ })
+ }
+}
+
+func GetAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+ return func(c *fiber.Ctx) error {
+ assistantID := c.Params("assistant_id")
+ if assistantID == "" {
+ return c.Status(fiber.StatusBadRequest).SendString("parameter assistant_id is required")
+ }
+
+ for _, assistant := range Assistants {
+ if assistant.ID == assistantID {
+ return c.Status(fiber.StatusOK).JSON(assistant)
+ }
+ }
+
+ return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find assistant with id: %s", assistantID))
+ }
+}
+
+type AssistantFile struct {
+ ID string `json:"id"`
+ Object string `json:"object"`
+ CreatedAt int64 `json:"created_at"`
+ AssistantID string `json:"assistant_id"`
+}
+
+var (
+ AssistantFiles []AssistantFile
+ AssistantsFileConfigFile = "assistantsFile.json"
+)
+
+type AssistantFileRequest struct {
+ FileID string `json:"file_id"`
+}
+
+type DeleteAssistantFileResponse struct {
+ ID string `json:"id"`
+ Object string `json:"object"`
+ Deleted bool `json:"deleted"`
+}
+
+func CreateAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+ return func(c *fiber.Ctx) error {
+ request := new(AssistantFileRequest)
+ if err := c.BodyParser(request); err != nil {
+ return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Cannot parse JSON"})
+ }
+
+ assistantID := c.Params("assistant_id")
+ if assistantID == "" {
+ return c.Status(fiber.StatusBadRequest).SendString("parameter assistant_id is required")
+ }
+
+ for _, assistant := range Assistants {
+ if assistant.ID == assistantID {
+ if len(assistant.FileIDs) > MaxFileIdSize {
+ return c.Status(fiber.StatusBadRequest).SendString(fmt.Sprintf("Max files %d for assistant %s reached.", MaxFileIdSize, assistant.Name))
+ }
+
+ for _, file := range UploadedFiles {
+ if file.ID == request.FileID {
+ assistant.FileIDs = append(assistant.FileIDs, request.FileID)
+ assistantFile := AssistantFile{
+ ID: file.ID,
+ Object: "assistant.file",
+ CreatedAt: time.Now().Unix(),
+ AssistantID: assistant.ID,
+ }
+ AssistantFiles = append(AssistantFiles, assistantFile)
+ utils.SaveConfig(appConfig.ConfigsDir, AssistantsFileConfigFile, AssistantFiles)
+ return c.Status(fiber.StatusOK).JSON(assistantFile)
+ }
+ }
+
+ return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find file_id: %s", request.FileID))
+ }
+ }
+
+ return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find "))
+ }
+}
+
+func ListAssistantFilesEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+ type ListAssistantFiles struct {
+ Data []File
+ Object string
+ }
+
+ return func(c *fiber.Ctx) error {
+ assistantID := c.Params("assistant_id")
+ if assistantID == "" {
+ return c.Status(fiber.StatusBadRequest).SendString("parameter assistant_id is required")
+ }
+
+ limitQuery := c.Query("limit", "20")
+ order := c.Query("order", "desc")
+ limit, err := strconv.Atoi(limitQuery)
+ if err != nil || limit < 1 || limit > 100 {
+ limit = 20 // Default to 20 if there's an error or the limit is out of bounds
+ }
+
+ // Sort files by CreatedAt depending on the order query parameter
+ if order == "asc" {
+ sort.Slice(AssistantFiles, func(i, j int) bool {
+ return AssistantFiles[i].CreatedAt < AssistantFiles[j].CreatedAt
+ })
+ } else { // default to "desc"
+ sort.Slice(AssistantFiles, func(i, j int) bool {
+ return AssistantFiles[i].CreatedAt > AssistantFiles[j].CreatedAt
+ })
+ }
+
+ // Limit the number of files returned
+ var limitedFiles []AssistantFile
+ hasMore := false
+ if len(AssistantFiles) > limit {
+ hasMore = true
+ limitedFiles = AssistantFiles[:limit]
+ } else {
+ limitedFiles = AssistantFiles
+ }
+
+ response := map[string]interface{}{
+ "object": "list",
+ "data": limitedFiles,
+ "first_id": func() string {
+ if len(limitedFiles) > 0 {
+ return limitedFiles[0].ID
+ }
+ return ""
+ }(),
+ "last_id": func() string {
+ if len(limitedFiles) > 0 {
+ return limitedFiles[len(limitedFiles)-1].ID
+ }
+ return ""
+ }(),
+ "has_more": hasMore,
+ }
+
+ return c.Status(fiber.StatusOK).JSON(response)
+ }
+}
+
+func ModifyAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+ return func(c *fiber.Ctx) error {
+ request := new(AssistantRequest)
+ if err := c.BodyParser(request); err != nil {
+ log.Warn().AnErr("Unable to parse AssistantRequest", err)
+ return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Cannot parse JSON"})
+ }
+
+ assistantID := c.Params("assistant_id")
+ if assistantID == "" {
+ return c.Status(fiber.StatusBadRequest).SendString("parameter assistant_id is required")
+ }
+
+ for i, assistant := range Assistants {
+ if assistant.ID == assistantID {
+ newAssistant := Assistant{
+ ID: assistantID,
+ Object: assistant.Object,
+ Created: assistant.Created,
+ Model: request.Model,
+ Name: request.Name,
+ Description: request.Description,
+ Instructions: request.Instructions,
+ Tools: request.Tools,
+ FileIDs: request.FileIDs, // todo: should probably verify fileids exist
+ Metadata: request.Metadata,
+ }
+
+ // Remove old one and replace with new one
+ Assistants = append(Assistants[:i], Assistants[i+1:]...)
+ Assistants = append(Assistants, newAssistant)
+ utils.SaveConfig(appConfig.ConfigsDir, AssistantsConfigFile, Assistants)
+ return c.Status(fiber.StatusOK).JSON(newAssistant)
+ }
+ }
+ return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find assistant with id: %s", assistantID))
+ }
+}
+
+func DeleteAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+ return func(c *fiber.Ctx) error {
+ assistantID := c.Params("assistant_id")
+ fileId := c.Params("file_id")
+ if assistantID == "" {
+ return c.Status(fiber.StatusBadRequest).SendString("parameter assistant_id and file_id are required")
+ }
+ // First remove file from assistant
+ for i, assistant := range Assistants {
+ if assistant.ID == assistantID {
+ for j, fileId := range assistant.FileIDs {
+ if fileId == fileId {
+ Assistants[i].FileIDs = append(Assistants[i].FileIDs[:j], Assistants[i].FileIDs[j+1:]...)
+
+ // Check if the file exists in the assistantFiles slice
+ for i, assistantFile := range AssistantFiles {
+ if assistantFile.ID == fileId {
+ // Remove the file from the assistantFiles slice
+ AssistantFiles = append(AssistantFiles[:i], AssistantFiles[i+1:]...)
+ utils.SaveConfig(appConfig.ConfigsDir, AssistantsFileConfigFile, AssistantFiles)
+ return c.Status(fiber.StatusOK).JSON(DeleteAssistantFileResponse{
+ ID: fileId,
+ Object: "assistant.file.deleted",
+ Deleted: true,
+ })
+ }
+ }
+ }
+ }
+
+ log.Warn().Msgf("Unable to locate file_id: %s in assistants: %s. Continuing to delete assistant file.", fileId, assistantID)
+ for i, assistantFile := range AssistantFiles {
+ if assistantFile.AssistantID == assistantID {
+
+ AssistantFiles = append(AssistantFiles[:i], AssistantFiles[i+1:]...)
+ utils.SaveConfig(appConfig.ConfigsDir, AssistantsFileConfigFile, AssistantFiles)
+
+ return c.Status(fiber.StatusNotFound).JSON(DeleteAssistantFileResponse{
+ ID: fileId,
+ Object: "assistant.file.deleted",
+ Deleted: true,
+ })
+ }
+ }
+ }
+ }
+ log.Warn().Msgf("Unable to find assistant: %s", assistantID)
+
+ return c.Status(fiber.StatusNotFound).JSON(DeleteAssistantFileResponse{
+ ID: fileId,
+ Object: "assistant.file.deleted",
+ Deleted: false,
+ })
+ }
+}
+
+func GetAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+ return func(c *fiber.Ctx) error {
+ assistantID := c.Params("assistant_id")
+ fileId := c.Params("file_id")
+ if assistantID == "" {
+ return c.Status(fiber.StatusBadRequest).SendString("parameter assistant_id and file_id are required")
+ }
+
+ for _, assistantFile := range AssistantFiles {
+ if assistantFile.AssistantID == assistantID {
+ if assistantFile.ID == fileId {
+ return c.Status(fiber.StatusOK).JSON(assistantFile)
+ }
+ return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find assistant file with file_id: %s", fileId))
+ }
+ }
+ return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find assistant file with assistant_id: %s", assistantID))
+ }
+}
diff --git a/core/http/endpoints/openai/assistant_test.go b/core/http/endpoints/openai/assistant_test.go
new file mode 100644
index 00000000..bdc41dda
--- /dev/null
+++ b/core/http/endpoints/openai/assistant_test.go
@@ -0,0 +1,456 @@
+package openai
+
+import (
+ "encoding/json"
+ "fmt"
+ "github.com/go-skynet/LocalAI/core/config"
+ "github.com/go-skynet/LocalAI/pkg/model"
+ "github.com/gofiber/fiber/v2"
+ "github.com/stretchr/testify/assert"
+ "io"
+ "io/ioutil"
+ "net/http"
+ "net/http/httptest"
+ "os"
+ "path/filepath"
+ "strings"
+ "testing"
+ "time"
+)
+
+var configsDir string = "/tmp/localai/configs"
+
+type MockLoader struct {
+ models []string
+}
+
+func tearDown() func() {
+ return func() {
+ UploadedFiles = []File{}
+ Assistants = []Assistant{}
+ AssistantFiles = []AssistantFile{}
+ _ = os.Remove(filepath.Join(configsDir, AssistantsConfigFile))
+ _ = os.Remove(filepath.Join(configsDir, AssistantsFileConfigFile))
+ }
+}
+
+func TestAssistantEndpoints(t *testing.T) {
+ // Preparing the mocked objects
+ cl := &config.BackendConfigLoader{}
+ //configsDir := "/tmp/localai/configs"
+ modelPath := "/tmp/localai/model"
+ var ml = model.NewModelLoader(modelPath)
+
+ appConfig := &config.ApplicationConfig{
+ ConfigsDir: configsDir,
+ UploadLimitMB: 10,
+ UploadDir: "test_dir",
+ ModelPath: modelPath,
+ }
+
+ _ = os.RemoveAll(appConfig.ConfigsDir)
+ _ = os.MkdirAll(appConfig.ConfigsDir, 0755)
+ _ = os.MkdirAll(modelPath, 0755)
+ os.Create(filepath.Join(modelPath, "ggml-gpt4all-j"))
+
+ app := fiber.New(fiber.Config{
+ BodyLimit: 20 * 1024 * 1024, // sets the limit to 20MB.
+ })
+
+ // Create a Test Server
+ app.Get("/assistants", ListAssistantsEndpoint(cl, ml, appConfig))
+ app.Post("/assistants", CreateAssistantEndpoint(cl, ml, appConfig))
+ app.Delete("/assistants/:assistant_id", DeleteAssistantEndpoint(cl, ml, appConfig))
+ app.Get("/assistants/:assistant_id", GetAssistantEndpoint(cl, ml, appConfig))
+ app.Post("/assistants/:assistant_id", ModifyAssistantEndpoint(cl, ml, appConfig))
+
+ app.Post("/files", UploadFilesEndpoint(cl, appConfig))
+ app.Get("/assistants/:assistant_id/files", ListAssistantFilesEndpoint(cl, ml, appConfig))
+ app.Post("/assistants/:assistant_id/files", CreateAssistantFileEndpoint(cl, ml, appConfig))
+ app.Delete("/assistants/:assistant_id/files/:file_id", DeleteAssistantFileEndpoint(cl, ml, appConfig))
+ app.Get("/assistants/:assistant_id/files/:file_id", GetAssistantFileEndpoint(cl, ml, appConfig))
+
+ t.Run("CreateAssistantEndpoint", func(t *testing.T) {
+ t.Cleanup(tearDown())
+ ar := &AssistantRequest{
+ Model: "ggml-gpt4all-j",
+ Name: "3.5-turbo",
+ Description: "Test Assistant",
+ Instructions: "You are computer science teacher answering student questions",
+ Tools: []Tool{{Type: Function}},
+ FileIDs: nil,
+ Metadata: nil,
+ }
+
+ resultAssistant, resp, err := createAssistant(app, *ar)
+ assert.NoError(t, err)
+ assert.Equal(t, fiber.StatusOK, resp.StatusCode)
+
+ assert.Equal(t, 1, len(Assistants))
+ //t.Cleanup(cleanupAllAssistants(t, app, []string{resultAssistant.ID}))
+
+ assert.Equal(t, ar.Name, resultAssistant.Name)
+ assert.Equal(t, ar.Model, resultAssistant.Model)
+ assert.Equal(t, ar.Tools, resultAssistant.Tools)
+ assert.Equal(t, ar.Description, resultAssistant.Description)
+ assert.Equal(t, ar.Instructions, resultAssistant.Instructions)
+ assert.Equal(t, ar.FileIDs, resultAssistant.FileIDs)
+ assert.Equal(t, ar.Metadata, resultAssistant.Metadata)
+ })
+
+ t.Run("ListAssistantsEndpoint", func(t *testing.T) {
+ var ids []string
+ var resultAssistant []Assistant
+ for i := 0; i < 4; i++ {
+ ar := &AssistantRequest{
+ Model: "ggml-gpt4all-j",
+ Name: fmt.Sprintf("3.5-turbo-%d", i),
+ Description: fmt.Sprintf("Test Assistant - %d", i),
+ Instructions: fmt.Sprintf("You are computer science teacher answering student questions - %d", i),
+ Tools: []Tool{{Type: Function}},
+ FileIDs: []string{"fid-1234"},
+ Metadata: map[string]string{"meta": "data"},
+ }
+
+ //var err error
+ ra, _, err := createAssistant(app, *ar)
+ // Because we create the assistants so fast all end up with the same created time.
+ time.Sleep(time.Second)
+ resultAssistant = append(resultAssistant, ra)
+ assert.NoError(t, err)
+ ids = append(ids, resultAssistant[i].ID)
+ }
+
+ t.Cleanup(cleanupAllAssistants(t, app, ids))
+
+ tests := []struct {
+ name string
+ reqURL string
+ expectedStatus int
+ expectedResult []Assistant
+ expectedStringResult string
+ }{
+ {
+ name: "Valid Usage - limit only",
+ reqURL: "/assistants?limit=2",
+ expectedStatus: http.StatusOK,
+ expectedResult: Assistants[:2], // Expecting the first two assistants
+ },
+ {
+ name: "Valid Usage - order asc",
+ reqURL: "/assistants?order=asc",
+ expectedStatus: http.StatusOK,
+ expectedResult: Assistants, // Expecting all assistants in ascending order
+ },
+ {
+ name: "Valid Usage - order desc",
+ reqURL: "/assistants?order=desc",
+ expectedStatus: http.StatusOK,
+ expectedResult: []Assistant{Assistants[3], Assistants[2], Assistants[1], Assistants[0]}, // Expecting all assistants in descending order
+ },
+ {
+ name: "Valid Usage - after specific ID",
+ reqURL: "/assistants?after=2",
+ expectedStatus: http.StatusOK,
+ // Note this is correct because it's put in descending order already
+ expectedResult: Assistants[:3], // Expecting assistants after (excluding) ID 2
+ },
+ {
+ name: "Valid Usage - before specific ID",
+ reqURL: "/assistants?before=4",
+ expectedStatus: http.StatusOK,
+ expectedResult: Assistants[2:], // Expecting assistants before (excluding) ID 3.
+ },
+ {
+ name: "Invalid Usage - non-integer limit",
+ reqURL: "/assistants?limit=two",
+ expectedStatus: http.StatusBadRequest,
+ expectedStringResult: "Invalid limit query value: two",
+ },
+ {
+ name: "Invalid Usage - non-existing id in after",
+ reqURL: "/assistants?after=100",
+ expectedStatus: http.StatusOK,
+ expectedResult: []Assistant(nil), // Expecting empty list as there are no IDs above 100
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ request := httptest.NewRequest(http.MethodGet, tt.reqURL, nil)
+ response, err := app.Test(request)
+ assert.NoError(t, err)
+ assert.Equal(t, tt.expectedStatus, response.StatusCode)
+ if tt.expectedStatus != fiber.StatusOK {
+ all, _ := ioutil.ReadAll(response.Body)
+ assert.Equal(t, tt.expectedStringResult, string(all))
+ } else {
+ var result []Assistant
+ err = json.NewDecoder(response.Body).Decode(&result)
+ assert.NoError(t, err)
+
+ assert.Equal(t, tt.expectedResult, result)
+ }
+ })
+ }
+ })
+
+ t.Run("DeleteAssistantEndpoint", func(t *testing.T) {
+ ar := &AssistantRequest{
+ Model: "ggml-gpt4all-j",
+ Name: "3.5-turbo",
+ Description: "Test Assistant",
+ Instructions: "You are computer science teacher answering student questions",
+ Tools: []Tool{{Type: Function}},
+ FileIDs: nil,
+ Metadata: nil,
+ }
+
+ resultAssistant, _, err := createAssistant(app, *ar)
+ assert.NoError(t, err)
+
+ target := fmt.Sprintf("/assistants/%s", resultAssistant.ID)
+ deleteReq := httptest.NewRequest(http.MethodDelete, target, nil)
+ _, err = app.Test(deleteReq)
+ assert.NoError(t, err)
+ assert.Equal(t, 0, len(Assistants))
+ })
+
+ t.Run("GetAssistantEndpoint", func(t *testing.T) {
+ ar := &AssistantRequest{
+ Model: "ggml-gpt4all-j",
+ Name: "3.5-turbo",
+ Description: "Test Assistant",
+ Instructions: "You are computer science teacher answering student questions",
+ Tools: []Tool{{Type: Function}},
+ FileIDs: nil,
+ Metadata: nil,
+ }
+
+ resultAssistant, _, err := createAssistant(app, *ar)
+ assert.NoError(t, err)
+ t.Cleanup(cleanupAllAssistants(t, app, []string{resultAssistant.ID}))
+
+ target := fmt.Sprintf("/assistants/%s", resultAssistant.ID)
+ request := httptest.NewRequest(http.MethodGet, target, nil)
+ response, err := app.Test(request)
+ assert.NoError(t, err)
+
+ var getAssistant Assistant
+ err = json.NewDecoder(response.Body).Decode(&getAssistant)
+ assert.NoError(t, err)
+
+ assert.Equal(t, resultAssistant.ID, getAssistant.ID)
+ })
+
+ t.Run("ModifyAssistantEndpoint", func(t *testing.T) {
+ ar := &AssistantRequest{
+ Model: "ggml-gpt4all-j",
+ Name: "3.5-turbo",
+ Description: "Test Assistant",
+ Instructions: "You are computer science teacher answering student questions",
+ Tools: []Tool{{Type: Function}},
+ FileIDs: nil,
+ Metadata: nil,
+ }
+
+ resultAssistant, _, err := createAssistant(app, *ar)
+ assert.NoError(t, err)
+
+ modifiedAr := &AssistantRequest{
+ Model: "ggml-gpt4all-j",
+ Name: "4.0-turbo",
+ Description: "Modified Test Assistant",
+ Instructions: "You are math teacher answering student questions",
+ Tools: []Tool{{Type: CodeInterpreter}},
+ FileIDs: nil,
+ Metadata: nil,
+ }
+
+ modifiedArJson, err := json.Marshal(modifiedAr)
+ assert.NoError(t, err)
+
+ target := fmt.Sprintf("/assistants/%s", resultAssistant.ID)
+ request := httptest.NewRequest(http.MethodPost, target, strings.NewReader(string(modifiedArJson)))
+ request.Header.Set(fiber.HeaderContentType, "application/json")
+
+ modifyResponse, err := app.Test(request)
+ assert.NoError(t, err)
+ var getAssistant Assistant
+ err = json.NewDecoder(modifyResponse.Body).Decode(&getAssistant)
+
+ t.Cleanup(cleanupAllAssistants(t, app, []string{getAssistant.ID}))
+
+ assert.Equal(t, resultAssistant.ID, getAssistant.ID) // IDs should match even if contents change
+ assert.Equal(t, modifiedAr.Tools, getAssistant.Tools)
+ assert.Equal(t, modifiedAr.Name, getAssistant.Name)
+ assert.Equal(t, modifiedAr.Instructions, getAssistant.Instructions)
+ assert.Equal(t, modifiedAr.Description, getAssistant.Description)
+ })
+
+ t.Run("CreateAssistantFileEndpoint", func(t *testing.T) {
+ t.Cleanup(tearDown())
+ file, assistant, err := createFileAndAssistant(t, app, appConfig)
+ assert.NoError(t, err)
+
+ afr := AssistantFileRequest{FileID: file.ID}
+ af, _, err := createAssistantFile(app, afr, assistant.ID)
+
+ assert.NoError(t, err)
+ assert.Equal(t, assistant.ID, af.AssistantID)
+ })
+ t.Run("ListAssistantFilesEndpoint", func(t *testing.T) {
+ t.Cleanup(tearDown())
+ file, assistant, err := createFileAndAssistant(t, app, appConfig)
+ assert.NoError(t, err)
+
+ afr := AssistantFileRequest{FileID: file.ID}
+ af, _, err := createAssistantFile(app, afr, assistant.ID)
+ assert.NoError(t, err)
+
+ assert.Equal(t, assistant.ID, af.AssistantID)
+ })
+ t.Run("GetAssistantFileEndpoint", func(t *testing.T) {
+ t.Cleanup(tearDown())
+ file, assistant, err := createFileAndAssistant(t, app, appConfig)
+ assert.NoError(t, err)
+
+ afr := AssistantFileRequest{FileID: file.ID}
+ af, _, err := createAssistantFile(app, afr, assistant.ID)
+ assert.NoError(t, err)
+ t.Cleanup(cleanupAssistantFile(t, app, af.ID, af.AssistantID))
+
+ target := fmt.Sprintf("/assistants/%s/files/%s", assistant.ID, file.ID)
+ request := httptest.NewRequest(http.MethodGet, target, nil)
+ response, err := app.Test(request)
+ assert.NoError(t, err)
+
+ var assistantFile AssistantFile
+ err = json.NewDecoder(response.Body).Decode(&assistantFile)
+ assert.NoError(t, err)
+
+ assert.Equal(t, af.ID, assistantFile.ID)
+ assert.Equal(t, af.AssistantID, assistantFile.AssistantID)
+ })
+ t.Run("DeleteAssistantFileEndpoint", func(t *testing.T) {
+ t.Cleanup(tearDown())
+ file, assistant, err := createFileAndAssistant(t, app, appConfig)
+ assert.NoError(t, err)
+
+ afr := AssistantFileRequest{FileID: file.ID}
+ af, _, err := createAssistantFile(app, afr, assistant.ID)
+ assert.NoError(t, err)
+
+ cleanupAssistantFile(t, app, af.ID, af.AssistantID)()
+
+ assert.Empty(t, AssistantFiles)
+ })
+
+}
+
+func createFileAndAssistant(t *testing.T, app *fiber.App, o *config.ApplicationConfig) (File, Assistant, error) {
+ ar := &AssistantRequest{
+ Model: "ggml-gpt4all-j",
+ Name: "3.5-turbo",
+ Description: "Test Assistant",
+ Instructions: "You are computer science teacher answering student questions",
+ Tools: []Tool{{Type: Function}},
+ FileIDs: nil,
+ Metadata: nil,
+ }
+
+ assistant, _, err := createAssistant(app, *ar)
+ if err != nil {
+ return File{}, Assistant{}, err
+ }
+ t.Cleanup(cleanupAllAssistants(t, app, []string{assistant.ID}))
+
+ file := CallFilesUploadEndpointWithCleanup(t, app, "test.txt", "file", "fine-tune", 5, o)
+ t.Cleanup(func() {
+ _, err := CallFilesDeleteEndpoint(t, app, file.ID)
+ assert.NoError(t, err)
+ })
+ return file, assistant, nil
+}
+
+func createAssistantFile(app *fiber.App, afr AssistantFileRequest, assistantId string) (AssistantFile, *http.Response, error) {
+ afrJson, err := json.Marshal(afr)
+ if err != nil {
+ return AssistantFile{}, nil, err
+ }
+
+ target := fmt.Sprintf("/assistants/%s/files", assistantId)
+ request := httptest.NewRequest(http.MethodPost, target, strings.NewReader(string(afrJson)))
+ request.Header.Set(fiber.HeaderContentType, "application/json")
+ request.Header.Set("OpenAi-Beta", "assistants=v1")
+
+ resp, err := app.Test(request)
+ if err != nil {
+ return AssistantFile{}, resp, err
+ }
+
+ var assistantFile AssistantFile
+ all, err := ioutil.ReadAll(resp.Body)
+ err = json.NewDecoder(strings.NewReader(string(all))).Decode(&assistantFile)
+ if err != nil {
+ return AssistantFile{}, resp, err
+ }
+
+ return assistantFile, resp, nil
+}
+
+func createAssistant(app *fiber.App, ar AssistantRequest) (Assistant, *http.Response, error) {
+ assistant, err := json.Marshal(ar)
+ if err != nil {
+ return Assistant{}, nil, err
+ }
+
+ request := httptest.NewRequest(http.MethodPost, "/assistants", strings.NewReader(string(assistant)))
+ request.Header.Set(fiber.HeaderContentType, "application/json")
+ request.Header.Set("OpenAi-Beta", "assistants=v1")
+
+ resp, err := app.Test(request)
+ if err != nil {
+ return Assistant{}, resp, err
+ }
+
+ bodyString, err := io.ReadAll(resp.Body)
+ if err != nil {
+ return Assistant{}, resp, err
+ }
+
+ var resultAssistant Assistant
+ err = json.NewDecoder(strings.NewReader(string(bodyString))).Decode(&resultAssistant)
+
+ return resultAssistant, resp, nil
+}
+
+func cleanupAllAssistants(t *testing.T, app *fiber.App, ids []string) func() {
+ return func() {
+ for _, assistant := range ids {
+ target := fmt.Sprintf("/assistants/%s", assistant)
+ deleteReq := httptest.NewRequest(http.MethodDelete, target, nil)
+ _, err := app.Test(deleteReq)
+ if err != nil {
+ t.Fatalf("Failed to delete assistant %s: %v", assistant, err)
+ }
+ }
+ }
+}
+
+func cleanupAssistantFile(t *testing.T, app *fiber.App, fileId, assistantId string) func() {
+ return func() {
+ target := fmt.Sprintf("/assistants/%s/files/%s", assistantId, fileId)
+ request := httptest.NewRequest(http.MethodDelete, target, nil)
+ request.Header.Set(fiber.HeaderContentType, "application/json")
+ request.Header.Set("OpenAi-Beta", "assistants=v1")
+
+ resp, err := app.Test(request)
+ assert.NoError(t, err)
+
+ var dafr DeleteAssistantFileResponse
+ err = json.NewDecoder(resp.Body).Decode(&dafr)
+ assert.NoError(t, err)
+ assert.True(t, dafr.Deleted)
+ }
+}
diff --git a/core/http/endpoints/openai/files.go b/core/http/endpoints/openai/files.go
index 5cb8d7a9..add9aaa0 100644
--- a/core/http/endpoints/openai/files.go
+++ b/core/http/endpoints/openai/files.go
@@ -1,23 +1,22 @@
package openai
import (
- "encoding/json"
"errors"
"fmt"
"os"
"path/filepath"
+ "sync/atomic"
"time"
"github.com/go-skynet/LocalAI/core/config"
"github.com/go-skynet/LocalAI/pkg/utils"
"github.com/gofiber/fiber/v2"
- "github.com/rs/zerolog/log"
)
-var uploadedFiles []File
+var UploadedFiles []File
-const uploadedFilesFile = "uploadedFiles.json"
+const UploadedFilesFile = "uploadedFiles.json"
// File represents the structure of a file object from the OpenAI API.
type File struct {
@@ -29,38 +28,6 @@ type File struct {
Purpose string `json:"purpose"` // The purpose of the file (e.g., "fine-tune", "classifications", etc.)
}
-func saveUploadConfig(uploadDir string) {
- file, err := json.MarshalIndent(uploadedFiles, "", " ")
- if err != nil {
- log.Error().Msgf("Failed to JSON marshal the uploadedFiles: %s", err)
- }
-
- err = os.WriteFile(filepath.Join(uploadDir, uploadedFilesFile), file, 0644)
- if err != nil {
- log.Error().Msgf("Failed to save uploadedFiles to file: %s", err)
- }
-}
-
-func LoadUploadConfig(uploadPath string) {
- uploadFilePath := filepath.Join(uploadPath, uploadedFilesFile)
-
- _, err := os.Stat(uploadFilePath)
- if os.IsNotExist(err) {
- log.Debug().Msgf("No uploadedFiles file found at %s", uploadFilePath)
- return
- }
-
- file, err := os.ReadFile(uploadFilePath)
- if err != nil {
- log.Error().Msgf("Failed to read file: %s", err)
- } else {
- err = json.Unmarshal(file, &uploadedFiles)
- if err != nil {
- log.Error().Msgf("Failed to JSON unmarshal the file into uploadedFiles: %s", err)
- }
- }
-}
-
// UploadFilesEndpoint https://platform.openai.com/docs/api-reference/files/create
func UploadFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
@@ -95,7 +62,7 @@ func UploadFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Appli
}
f := File{
- ID: fmt.Sprintf("file-%d", time.Now().Unix()),
+ ID: fmt.Sprintf("file-%d", getNextFileId()),
Object: "file",
Bytes: int(file.Size),
CreatedAt: time.Now(),
@@ -103,12 +70,19 @@ func UploadFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Appli
Purpose: purpose,
}
- uploadedFiles = append(uploadedFiles, f)
- saveUploadConfig(appConfig.UploadDir)
+ UploadedFiles = append(UploadedFiles, f)
+ utils.SaveConfig(appConfig.UploadDir, UploadedFilesFile, UploadedFiles)
return c.Status(fiber.StatusOK).JSON(f)
}
}
+var currentFileId int64 = 0
+
+func getNextFileId() int64 {
+ atomic.AddInt64(¤tId, 1)
+ return currentId
+}
+
// ListFilesEndpoint https://platform.openai.com/docs/api-reference/files/list
func ListFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
type ListFiles struct {
@@ -121,9 +95,9 @@ func ListFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Applica
purpose := c.Query("purpose")
if purpose == "" {
- listFiles.Data = uploadedFiles
+ listFiles.Data = UploadedFiles
} else {
- for _, f := range uploadedFiles {
+ for _, f := range UploadedFiles {
if purpose == f.Purpose {
listFiles.Data = append(listFiles.Data, f)
}
@@ -140,7 +114,7 @@ func getFileFromRequest(c *fiber.Ctx) (*File, error) {
return nil, fmt.Errorf("file_id parameter is required")
}
- for _, f := range uploadedFiles {
+ for _, f := range UploadedFiles {
if id == f.ID {
return &f, nil
}
@@ -184,14 +158,14 @@ func DeleteFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Appli
}
// Remove upload from list
- for i, f := range uploadedFiles {
+ for i, f := range UploadedFiles {
if f.ID == file.ID {
- uploadedFiles = append(uploadedFiles[:i], uploadedFiles[i+1:]...)
+ UploadedFiles = append(UploadedFiles[:i], UploadedFiles[i+1:]...)
break
}
}
- saveUploadConfig(appConfig.UploadDir)
+ utils.SaveConfig(appConfig.UploadDir, UploadedFilesFile, UploadedFiles)
return c.JSON(DeleteStatus{
Id: file.ID,
Object: "file",
diff --git a/core/http/endpoints/openai/files_test.go b/core/http/endpoints/openai/files_test.go
index a036bd0d..e1c1011e 100644
--- a/core/http/endpoints/openai/files_test.go
+++ b/core/http/endpoints/openai/files_test.go
@@ -3,6 +3,7 @@ package openai
import (
"encoding/json"
"fmt"
+ "github.com/rs/zerolog/log"
"io"
"mime/multipart"
"net/http"
@@ -73,6 +74,7 @@ func TestUploadFileExceedSizeLimit(t *testing.T) {
app.Get("/files/:file_id/content", GetFilesContentsEndpoint(loader, option))
t.Run("UploadFilesEndpoint file size exceeds limit", func(t *testing.T) {
+ t.Cleanup(tearDown())
resp, err := CallFilesUploadEndpoint(t, app, "foo.txt", "file", "fine-tune", 11, option)
assert.NoError(t, err)
@@ -80,46 +82,54 @@ func TestUploadFileExceedSizeLimit(t *testing.T) {
assert.Contains(t, bodyToString(resp, t), "exceeds upload limit")
})
t.Run("UploadFilesEndpoint purpose not defined", func(t *testing.T) {
+ t.Cleanup(tearDown())
resp, _ := CallFilesUploadEndpoint(t, app, "foo.txt", "file", "", 5, option)
assert.Equal(t, fiber.StatusBadRequest, resp.StatusCode)
assert.Contains(t, bodyToString(resp, t), "Purpose is not defined")
})
t.Run("UploadFilesEndpoint file already exists", func(t *testing.T) {
+ t.Cleanup(tearDown())
f1 := CallFilesUploadEndpointWithCleanup(t, app, "foo.txt", "file", "fine-tune", 5, option)
resp, err := CallFilesUploadEndpoint(t, app, "foo.txt", "file", "fine-tune", 5, option)
fmt.Println(f1)
- fmt.Printf("ERror: %v", err)
+ fmt.Printf("ERror: %v\n", err)
+ fmt.Printf("resp: %+v\n", resp)
assert.Equal(t, fiber.StatusBadRequest, resp.StatusCode)
assert.Contains(t, bodyToString(resp, t), "File already exists")
})
t.Run("UploadFilesEndpoint file uploaded successfully", func(t *testing.T) {
+ t.Cleanup(tearDown())
file := CallFilesUploadEndpointWithCleanup(t, app, "test.txt", "file", "fine-tune", 5, option)
// Check if file exists in the disk
- filePath := filepath.Join(option.UploadDir, utils2.SanitizeFileName("test.txt"))
+ testName := strings.Split(t.Name(), "/")[1]
+ fileName := testName + "-test.txt"
+ filePath := filepath.Join(option.UploadDir, utils2.SanitizeFileName(fileName))
_, err := os.Stat(filePath)
assert.False(t, os.IsNotExist(err))
assert.Equal(t, file.Bytes, 5242880)
assert.NotEmpty(t, file.CreatedAt)
- assert.Equal(t, file.Filename, "test.txt")
+ assert.Equal(t, file.Filename, fileName)
assert.Equal(t, file.Purpose, "fine-tune")
})
t.Run("ListFilesEndpoint without purpose parameter", func(t *testing.T) {
+ t.Cleanup(tearDown())
resp, err := CallListFilesEndpoint(t, app, "")
assert.NoError(t, err)
assert.Equal(t, 200, resp.StatusCode)
listFiles := responseToListFile(t, resp)
- if len(listFiles.Data) != len(uploadedFiles) {
- t.Errorf("Expected %v files, got %v files", len(uploadedFiles), len(listFiles.Data))
+ if len(listFiles.Data) != len(UploadedFiles) {
+ t.Errorf("Expected %v files, got %v files", len(UploadedFiles), len(listFiles.Data))
}
})
t.Run("ListFilesEndpoint with valid purpose parameter", func(t *testing.T) {
+ t.Cleanup(tearDown())
_ = CallFilesUploadEndpointWithCleanup(t, app, "test.txt", "file", "fine-tune", 5, option)
resp, err := CallListFilesEndpoint(t, app, "fine-tune")
@@ -131,6 +141,7 @@ func TestUploadFileExceedSizeLimit(t *testing.T) {
}
})
t.Run("ListFilesEndpoint with invalid query parameter", func(t *testing.T) {
+ t.Cleanup(tearDown())
resp, err := CallListFilesEndpoint(t, app, "not-so-fine-tune")
assert.NoError(t, err)
assert.Equal(t, 200, resp.StatusCode)
@@ -142,6 +153,7 @@ func TestUploadFileExceedSizeLimit(t *testing.T) {
}
})
t.Run("GetFilesContentsEndpoint get file content", func(t *testing.T) {
+ t.Cleanup(tearDown())
req := httptest.NewRequest("GET", "/files", nil)
resp, _ := app.Test(req)
assert.Equal(t, 200, resp.StatusCode)
@@ -175,8 +187,10 @@ func CallFilesContentEndpoint(t *testing.T, app *fiber.App, fileId string) (*htt
}
func CallFilesUploadEndpoint(t *testing.T, app *fiber.App, fileName, tag, purpose string, fileSize int, appConfig *config.ApplicationConfig) (*http.Response, error) {
+ testName := strings.Split(t.Name(), "/")[1]
+
// Create a file that exceeds the limit
- file := createTestFile(t, fileName, fileSize, appConfig)
+ file := createTestFile(t, testName+"-"+fileName, fileSize, appConfig)
// Creating a new HTTP Request
body, writer := newMultipartFile(file.Name(), tag, purpose)
@@ -188,7 +202,8 @@ func CallFilesUploadEndpoint(t *testing.T, app *fiber.App, fileName, tag, purpos
func CallFilesUploadEndpointWithCleanup(t *testing.T, app *fiber.App, fileName, tag, purpose string, fileSize int, appConfig *config.ApplicationConfig) File {
// Create a file that exceeds the limit
- file := createTestFile(t, fileName, fileSize, appConfig)
+ testName := strings.Split(t.Name(), "/")[1]
+ file := createTestFile(t, testName+"-"+fileName, fileSize, appConfig)
// Creating a new HTTP Request
body, writer := newMultipartFile(file.Name(), tag, purpose)
@@ -199,11 +214,12 @@ func CallFilesUploadEndpointWithCleanup(t *testing.T, app *fiber.App, fileName,
assert.NoError(t, err)
f := responseToFile(t, resp)
- id := f.ID
- t.Cleanup(func() {
- _, err := CallFilesDeleteEndpoint(t, app, id)
- assert.NoError(t, err)
- })
+ //id := f.ID
+ //t.Cleanup(func() {
+ // _, err := CallFilesDeleteEndpoint(t, app, id)
+ // assert.NoError(t, err)
+ // assert.Empty(t, UploadedFiles)
+ //})
return f
@@ -240,7 +256,8 @@ func createTestFile(t *testing.T, name string, sizeMB int, option *config.Applic
t.Fatalf("Error MKDIR: %v", err)
}
- file, _ := os.Create(name)
+ file, err := os.Create(name)
+ assert.NoError(t, err)
file.WriteString(strings.Repeat("a", sizeMB*1024*1024)) // sizeMB MB File
t.Cleanup(func() {
@@ -280,7 +297,7 @@ func responseToListFile(t *testing.T, resp *http.Response) ListFiles {
err := json.NewDecoder(strings.NewReader(responseToString)).Decode(&listFiles)
if err != nil {
- fmt.Printf("Failed to decode response: %s", err)
+ log.Error().Msgf("Failed to decode response: %s", err)
}
return listFiles
diff --git a/main.go b/main.go
index 400dcb57..651dd1c2 100644
--- a/main.go
+++ b/main.go
@@ -149,6 +149,12 @@ func main() {
EnvVars: []string{"UPLOAD_PATH"},
Value: "/tmp/localai/upload",
},
+ &cli.StringFlag{
+ Name: "config-path",
+ Usage: "Path to store uploads from files api",
+ EnvVars: []string{"CONFIG_PATH"},
+ Value: "/tmp/localai/config",
+ },
&cli.StringFlag{
Name: "backend-assets-path",
Usage: "Path used to extract libraries that are required by some of the backends in runtime.",
@@ -241,6 +247,7 @@ For a list of compatible model, check out: https://localai.io/model-compatibilit
config.WithImageDir(ctx.String("image-path")),
config.WithAudioDir(ctx.String("audio-path")),
config.WithUploadDir(ctx.String("upload-path")),
+ config.WithConfigsDir(ctx.String("config-path")),
config.WithF16(ctx.Bool("f16")),
config.WithStringGalleries(ctx.String("galleries")),
config.WithModelLibraryURL(ctx.String("remote-library")),
diff --git a/pkg/utils/config.go b/pkg/utils/config.go
new file mode 100644
index 00000000..a9167ed3
--- /dev/null
+++ b/pkg/utils/config.go
@@ -0,0 +1,41 @@
+package utils
+
+import (
+ "encoding/json"
+ "github.com/rs/zerolog/log"
+ "os"
+ "path/filepath"
+)
+
+func SaveConfig(filePath, fileName string, obj any) {
+ file, err := json.MarshalIndent(obj, "", " ")
+ if err != nil {
+ log.Error().Msgf("Failed to JSON marshal the uploadedFiles: %s", err)
+ }
+
+ absolutePath := filepath.Join(filePath, fileName)
+ err = os.WriteFile(absolutePath, file, 0644)
+ if err != nil {
+ log.Error().Msgf("Failed to save configuration file to %s: %s", absolutePath, err)
+ }
+}
+
+func LoadConfig(filePath, fileName string, obj interface{}) {
+ uploadFilePath := filepath.Join(filePath, fileName)
+
+ _, err := os.Stat(uploadFilePath)
+ if os.IsNotExist(err) {
+ log.Debug().Msgf("No configuration file found at %s", uploadFilePath)
+ return
+ }
+
+ file, err := os.ReadFile(uploadFilePath)
+ if err != nil {
+ log.Error().Msgf("Failed to read file: %s", err)
+ } else {
+ err = json.Unmarshal(file, &obj)
+ if err != nil {
+ log.Error().Msgf("Failed to JSON unmarshal the file %s: %v", uploadFilePath, err)
+ }
+ }
+}
From 607586e0b7b26a4d4c5cf5e4830ac60eb2520540 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Tue, 26 Mar 2024 22:56:02 +0100
Subject: [PATCH 0045/2750] fix: downgrade torch (#1902)
Signed-off-by: Ettore Di Giacinto
---
.../python/common-env/transformers/transformers-nvidia.yml | 4 ++--
backend/python/common-env/transformers/transformers.yml | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/backend/python/common-env/transformers/transformers-nvidia.yml b/backend/python/common-env/transformers/transformers-nvidia.yml
index 55361234..e8d8155b 100644
--- a/backend/python/common-env/transformers/transformers-nvidia.yml
+++ b/backend/python/common-env/transformers/transformers-nvidia.yml
@@ -89,8 +89,8 @@ dependencies:
- six==1.16.0
- sympy==1.12
- tokenizers
- - torch==2.2.1
- - torchvision==0.17.1
+ - torch==2.1.2
+ - torchvision==0.16.2
- torchaudio==2.1.2
- tqdm==4.66.1
- triton==2.1.0
diff --git a/backend/python/common-env/transformers/transformers.yml b/backend/python/common-env/transformers/transformers.yml
index bdf8c36f..be378f67 100644
--- a/backend/python/common-env/transformers/transformers.yml
+++ b/backend/python/common-env/transformers/transformers.yml
@@ -76,8 +76,8 @@ dependencies:
- six==1.16.0
- sympy==1.12
- tokenizers
- - torch==2.2.1
- - torchvision==0.17.1
+ - torch==2.1.2
+ - torchvision==0.16.2
- torchaudio==2.1.2
- tqdm==4.66.1
- triton==2.1.0
From d3c283ac19e76ac6c87f6d5c9aa04fb9b43f7371 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Tue, 26 Mar 2024 22:56:42 +0100
Subject: [PATCH 0046/2750] :arrow_up: Update docs version mudler/LocalAI
(#1903)
Signed-off-by: GitHub
Co-authored-by: mudler
---
docs/data/version.json | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/data/version.json b/docs/data/version.json
index 20ca21c5..b6372479 100644
--- a/docs/data/version.json
+++ b/docs/data/version.json
@@ -1,3 +1,3 @@
{
- "version": "v2.10.1"
+ "version": "v2.11.0"
}
From b500ceaf735b1678516774bb26a0ddae406e2c23 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Wed, 27 Mar 2024 00:21:54 +0100
Subject: [PATCH 0047/2750] :arrow_up: Update ggerganov/llama.cpp (#1904)
Signed-off-by: GitHub
Co-authored-by: mudler
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index 518287da..bd07eac7 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=b06c16ef9f81d84da520232c125d4d8a1d273736
+CPPLLAMA_VERSION?=557410b8f06380560155ac7fcb8316d71ddc9837
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
From e7cbe32601bed0748bd163f91dbf51e3e3704c7e Mon Sep 17 00:00:00 2001
From: fakezeta
Date: Wed, 27 Mar 2024 00:31:43 +0100
Subject: [PATCH 0048/2750] feat: Openvino runtime for transformer backend and
streaming support for Openvino and CUDA (#1892)
* fixes #1775 and #1774
Add BitsAndBytes Quantization and fixes embedding on CUDA devices
* Manage 4bit and 8 bit quantization
Manage different BitsAndBytes options with the quantization: parameter in yaml
* fix compilation errors on non CUDA environment
* OpenVINO draft
First draft of OpenVINO integration in transformer backend
* first working implementation
* Streaming working
* Small fix for regression on CUDA and XPU
* use pip version of optimum[openvino]
* Update backend/python/transformers/transformers_server.py
Signed-off-by: Ettore Di Giacinto
---------
Signed-off-by: Ettore Di Giacinto
Co-authored-by: Ettore Di Giacinto
---
.../common-env/transformers/transformers.yml | 7 +-
.../transformers/transformers_server.py | 101 +++++++++++++++---
2 files changed, 90 insertions(+), 18 deletions(-)
diff --git a/backend/python/common-env/transformers/transformers.yml b/backend/python/common-env/transformers/transformers.yml
index be378f67..3b3b8fe7 100644
--- a/backend/python/common-env/transformers/transformers.yml
+++ b/backend/python/common-env/transformers/transformers.yml
@@ -34,6 +34,7 @@ dependencies:
- boto3==1.28.61
- botocore==1.31.61
- certifi==2023.7.22
+ - coloredlogs==15.0.1
- TTS==0.22.0
- charset-normalizer==3.3.0
- datasets==2.14.5
@@ -48,6 +49,7 @@ dependencies:
- funcy==2.0
- grpcio==1.59.0
- huggingface-hub
+ - humanfriendly==10.0
- idna==3.4
- jinja2==3.1.2
- jmespath==1.0.1
@@ -57,7 +59,10 @@ dependencies:
- multiprocess==0.70.15
- networkx
- numpy==1.26.0
- - optimum==1.17.1
+ - onnx==1.15.0
+ - openvino==2024.0.0
+ - openvino-telemetry==2023.2.1
+ - optimum[openvino]==1.17.1
- packaging==23.2
- pandas
- peft==0.5.0
diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py
index 264e7fad..a8702021 100755
--- a/backend/python/transformers/transformers_server.py
+++ b/backend/python/transformers/transformers_server.py
@@ -8,6 +8,7 @@ import argparse
import signal
import sys
import os
+from threading import Thread
import time
import backend_pb2
@@ -17,13 +18,16 @@ import grpc
import torch
import torch.cuda
+
XPU=os.environ.get("XPU", "0") == "1"
if XPU:
import intel_extension_for_pytorch as ipex
from intel_extension_for_transformers.transformers.modeling import AutoModelForCausalLM
- from transformers import AutoTokenizer, AutoModel, set_seed
+ from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer
+ from optimum.intel.openvino import OVModelForCausalLM
+ from openvino.runtime import Core
else:
- from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, set_seed, BitsAndBytesConfig
+ from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, set_seed, BitsAndBytesConfig, TextIteratorStreamer
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
@@ -81,6 +85,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
compute=torch.bfloat16
self.CUDA = request.CUDA
+ self.OV=False
device_map="cpu"
@@ -105,23 +110,55 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
bnb_4bit_compute_dtype = None,
load_in_8bit=True,
)
-
-
+
try:
if request.Type == "AutoModelForCausalLM":
if XPU:
- if quantization == "xpu_4bit":
+ device_map="xpu"
+ compute=torch.float16
+ if request.Quantization == "xpu_4bit":
xpu_4bit = True
- self.model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode,
- device_map="xpu", load_in_4bit=xpu_4bit)
+ xpu_8bit = False
+ elif request.Quantization == "xpu_8bit":
+ xpu_4bit = False
+ xpu_8bit = True
+ else:
+ xpu_4bit = False
+ xpu_8bit = False
+ self.model = AutoModelForCausalLM.from_pretrained(model_name,
+ trust_remote_code=request.TrustRemoteCode,
+ use_safetensors=True,
+ device_map=device_map,
+ load_in_4bit=xpu_4bit,
+ load_in_8bit=xpu_8bit,
+ torch_dtype=compute)
else:
- self.model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode, use_safetensors=True, quantization_config=quantization, device_map=device_map, torch_dtype=compute)
+ self.model = AutoModelForCausalLM.from_pretrained(model_name,
+ trust_remote_code=request.TrustRemoteCode,
+ use_safetensors=True,
+ quantization_config=quantization,
+ device_map=device_map,
+ torch_dtype=compute)
+ elif request.Type == "OVModelForCausalLM":
+ if "GPU" in Core().available_devices:
+ device_map="GPU"
+ else:
+ device_map="CPU"
+ self.model = OVModelForCausalLM.from_pretrained(model_name,
+ compile=True,
+ device=device_map)
+ self.OV = True
else:
- self.model = AutoModel.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode, use_safetensors=True, quantization_config=quantization, device_map=device_map, torch_dtype=compute)
+ self.model = AutoModel.from_pretrained(model_name,
+ trust_remote_code=request.TrustRemoteCode,
+ use_safetensors=True,
+ quantization_config=quantization,
+ device_map=device_map,
+ torch_dtype=compute)
self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True)
self.XPU = False
- if XPU:
+ if XPU and self.OV == False:
self.XPU = True
try:
print("Optimizing model", model_name, "to XPU.", file=sys.stderr)
@@ -130,6 +167,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
print("Not using XPU:", err, file=sys.stderr)
except Exception as err:
+ print("Error:", err, file=sys.stderr)
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
# Implement your logic here for the LoadModel service
# Replace this with your desired response
@@ -167,7 +205,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
print("Embeddings:", sentence_embeddings, file=sys.stderr)
return backend_pb2.EmbeddingResult(embeddings=sentence_embeddings[0])
- def Predict(self, request, context):
+ def Predict(self, request, context, streaming=False):
"""
Generates text based on the given prompt and sampling parameters.
@@ -186,15 +224,42 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
if request.Tokens > 0:
max_tokens = request.Tokens
- inputs = self.tokenizer(request.Prompt, return_tensors="pt").input_ids
+ inputs = self.tokenizer(request.Prompt, return_tensors="pt")
if self.CUDA:
inputs = inputs.to("cuda")
- if XPU:
+ if XPU and self.OV == False:
inputs = inputs.to("xpu")
+ streaming = False
- outputs = self.model.generate(inputs,max_new_tokens=max_tokens, temperature=request.Temperature, top_p=request.TopP, do_sample=True, pad_token_id=self.tokenizer.eos_token_id)
- generated_text = self.tokenizer.batch_decode(outputs[:, inputs.shape[1]:], skip_special_tokens=True)[0]
-
+ if streaming:
+ streamer=TextIteratorStreamer(self.tokenizer,
+ skip_prompt=True,
+ skip_special_tokens=True)
+ config=dict(inputs,
+ max_new_tokens=max_tokens,
+ temperature=request.Temperature,
+ top_p=request.TopP,
+ top_k=request.TopK,
+ do_sample=True,
+ attention_mask=inputs["attention_mask"],
+ eos_token_id=self.tokenizer.eos_token_id,
+ pad_token_id=self.tokenizer.eos_token_id,
+ streamer=streamer)
+ thread=Thread(target=self.model.generate, kwargs=config)
+ thread.start()
+ generated_text = ""
+ for new_text in streamer:
+ generated_text += new_text
+ yield backend_pb2.Reply(message=bytes(new_text, encoding='utf-8'))
+ else:
+ outputs = self.model.generate(inputs["input_ids"],
+ max_new_tokens=max_tokens,
+ temperature=request.Temperature,
+ top_p=request.TopP,
+ top_k=request.TopK,
+ do_sample=True,
+ pad_token=self.tokenizer.eos_token_id)
+ generated_text = self.tokenizer.batch_decode(outputs[:, inputs["input_ids"].shape[1]:], skip_special_tokens=True)[0]
return backend_pb2.Reply(message=bytes(generated_text, encoding='utf-8'))
def PredictStream(self, request, context):
@@ -208,7 +273,9 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
Returns:
backend_pb2.Result: The predict stream result.
"""
- yield self.Predict(request, context)
+ iterations = self.Predict(request, context, streaming=True)
+ for iteration in iterations:
+ yield iteration
def serve(address):
From 8210ffcb6c721ed4931d3f4b0bf52e787c52d7d0 Mon Sep 17 00:00:00 2001
From: fakezeta
Date: Wed, 27 Mar 2024 17:50:35 +0100
Subject: [PATCH 0049/2750] feat: Token Stream support for Transformer, fix:
missing package for OpenVINO (#1908)
* Streaming working
* Small fix for regression on CUDA and XPU
* use pip version of optimum[openvino]
* Update backend/python/transformers/transformers_server.py
Signed-off-by: Ettore Di Giacinto
* Token streaming support
fix optimum[openvino] package in install.sh
* Token Streaming support
---------
Signed-off-by: Ettore Di Giacinto
Co-authored-by: Ettore Di Giacinto
---
.../python/common-env/transformers/install.sh | 2 +-
.../transformers/transformers_server.py | 118 +++++++++++-------
2 files changed, 72 insertions(+), 48 deletions(-)
diff --git a/backend/python/common-env/transformers/install.sh b/backend/python/common-env/transformers/install.sh
index e268fcc8..8502adde 100644
--- a/backend/python/common-env/transformers/install.sh
+++ b/backend/python/common-env/transformers/install.sh
@@ -25,7 +25,7 @@ if [ -d "/opt/intel" ]; then
# Intel GPU: If the directory exists, we assume we are using the intel image
# (no conda env)
# https://github.com/intel/intel-extension-for-pytorch/issues/538
- pip install intel-extension-for-transformers datasets sentencepiece tiktoken neural_speed
+ pip install intel-extension-for-transformers datasets sentencepiece tiktoken neural_speed optimum[openvino]
fi
if [ "$PIP_CACHE_PURGE" = true ] ; then
diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py
index a8702021..04324d9b 100755
--- a/backend/python/transformers/transformers_server.py
+++ b/backend/python/transformers/transformers_server.py
@@ -9,6 +9,7 @@ import signal
import sys
import os
from threading import Thread
+import asyncio
import time
import backend_pb2
@@ -205,17 +206,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
print("Embeddings:", sentence_embeddings, file=sys.stderr)
return backend_pb2.EmbeddingResult(embeddings=sentence_embeddings[0])
- def Predict(self, request, context, streaming=False):
- """
- Generates text based on the given prompt and sampling parameters.
-
- Args:
- request: The predict request.
- context: The gRPC context.
-
- Returns:
- backend_pb2.Reply: The predict result.
- """
+ async def _predict(self, request, context, streaming=False):
set_seed(request.Seed)
if request.TopP == 0:
request.TopP = 0.9
@@ -248,21 +239,54 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
thread=Thread(target=self.model.generate, kwargs=config)
thread.start()
generated_text = ""
- for new_text in streamer:
- generated_text += new_text
- yield backend_pb2.Reply(message=bytes(new_text, encoding='utf-8'))
+ try:
+ for new_text in streamer:
+ generated_text += new_text
+ yield backend_pb2.Reply(message=bytes(new_text, encoding='utf-8'))
+ finally:
+ thread.join()
else:
- outputs = self.model.generate(inputs["input_ids"],
- max_new_tokens=max_tokens,
- temperature=request.Temperature,
- top_p=request.TopP,
- top_k=request.TopK,
- do_sample=True,
- pad_token=self.tokenizer.eos_token_id)
+ if XPU and self.OV == False:
+ outputs = self.model.generate(inputs["input_ids"],
+ max_new_tokens=max_tokens,
+ temperature=request.Temperature,
+ top_p=request.TopP,
+ top_k=request.TopK,
+ do_sample=True,
+ pad_token=self.tokenizer.eos_token_id)
+ else:
+ outputs = self.model.generate(inputs["input_ids"],
+ max_new_tokens=max_tokens,
+ temperature=request.Temperature,
+ top_p=request.TopP,
+ top_k=request.TopK,
+ do_sample=True,
+ attention_mask=inputs["attention_mask"],
+ eos_token_id=self.tokenizer.eos_token_id,
+ pad_token_id=self.tokenizer.eos_token_id)
generated_text = self.tokenizer.batch_decode(outputs[:, inputs["input_ids"].shape[1]:], skip_special_tokens=True)[0]
- return backend_pb2.Reply(message=bytes(generated_text, encoding='utf-8'))
- def PredictStream(self, request, context):
+ if streaming:
+ return
+
+ yield backend_pb2.Reply(message=bytes(generated_text, encoding='utf-8'))
+
+ async def Predict(self, request, context):
+ """
+ Generates text based on the given prompt and sampling parameters.
+
+ Args:
+ request: The predict request.
+ context: The gRPC context.
+
+ Returns:
+ backend_pb2.Reply: The predict result.
+ """
+ gen = self._predict(request, context, streaming=False)
+ res = await gen.__anext__()
+ return res
+
+ async def PredictStream(self, request, context):
"""
Generates text based on the given prompt and sampling parameters, and streams the results.
@@ -273,33 +297,33 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
Returns:
backend_pb2.Result: The predict stream result.
"""
- iterations = self.Predict(request, context, streaming=True)
- for iteration in iterations:
- yield iteration
+ iterations = self._predict(request, context, streaming=True)
+ try:
+ async for iteration in iterations:
+ yield iteration
+ finally:
+ await iterations.aclose()
-
-def serve(address):
- server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
+async def serve(address):
+ # Start asyncio gRPC server
+ server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
+ # Add the servicer to the server
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
+ # Bind the server to the address
server.add_insecure_port(address)
- server.start()
+
+ # Gracefully shutdown the server on SIGTERM or SIGINT
+ loop = asyncio.get_event_loop()
+ for sig in (signal.SIGINT, signal.SIGTERM):
+ loop.add_signal_handler(
+ sig, lambda: asyncio.ensure_future(server.stop(5))
+ )
+
+ # Start the server
+ await server.start()
print("Server started. Listening on: " + address, file=sys.stderr)
-
- # Define the signal handler function
- def signal_handler(sig, frame):
- print("Received termination signal. Shutting down...")
- server.stop(0)
- sys.exit(0)
-
- # Set the signal handlers for SIGINT and SIGTERM
- signal.signal(signal.SIGINT, signal_handler)
- signal.signal(signal.SIGTERM, signal_handler)
-
- try:
- while True:
- time.sleep(_ONE_DAY_IN_SECONDS)
- except KeyboardInterrupt:
- server.stop(0)
+ # Wait for the server to be terminated
+ await server.wait_for_termination()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run the gRPC server.")
@@ -308,4 +332,4 @@ if __name__ == "__main__":
)
args = parser.parse_args()
- serve(args.addr)
+ asyncio.run(serve(args.addr))
\ No newline at end of file
From 93f0b7ae03ec0a92375616fda62ca3a0ebb075e9 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Wed, 27 Mar 2024 18:17:12 +0100
Subject: [PATCH 0050/2750] update hot topics
Signed-off-by: Ettore Di Giacinto
---
README.md | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/README.md b/README.md
index 8cf15d5a..5f1bfe0b 100644
--- a/README.md
+++ b/README.md
@@ -50,14 +50,12 @@
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
+- Openvino support: https://github.com/mudler/LocalAI/pull/1892
- Vector store: https://github.com/mudler/LocalAI/pull/1795
- All-in-one container image: https://github.com/mudler/LocalAI/issues/1855
-- Parallel function calling: https://github.com/mudler/LocalAI/pull/1726
+- Parallel function calling: https://github.com/mudler/LocalAI/pull/1726 / Tools API support: https://github.com/mudler/LocalAI/pull/1715
- Upload file API: https://github.com/mudler/LocalAI/pull/1703
-- Tools API support: https://github.com/mudler/LocalAI/pull/1715
-- LLaVa 1.6: https://github.com/mudler/LocalAI/pull/1714
-- ROCm container images: https://github.com/mudler/LocalAI/pull/1595
-- Intel GPU support (sycl, transformers, diffusers): https://github.com/mudler/LocalAI/issues/1653
+- ROCm container images: https://github.com/mudler/LocalAI/pull/1595 / Intel GPU support (sycl, transformers, diffusers): https://github.com/mudler/LocalAI/issues/1653
- Mamba support: https://github.com/mudler/LocalAI/pull/1589
- Start and share models with config file: https://github.com/mudler/LocalAI/pull/1522
- 🐸 Coqui: https://github.com/mudler/LocalAI/pull/1489
From 66ee4afb952d085f469ce5e47f803746d010c285 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Wed, 27 Mar 2024 21:10:58 +0100
Subject: [PATCH 0051/2750] feat(welcome): add simple welcome page (#1912)
* feat(welcome): add simple welcome page
* feat(api): add 404 handling
---
core/config/application_config.go | 5 +++
core/http/api.go | 42 +++++++++++++++++++-
core/http/views/404.html | 33 ++++++++++++++++
core/http/views/index.html | 58 ++++++++++++++++++++++++++++
core/http/views/partials/footer.html | 4 ++
core/http/views/partials/head.html | 13 +++++++
core/http/views/partials/navbar.html | 15 +++++++
go.mod | 3 ++
go.sum | 6 +++
main.go | 11 ++++++
10 files changed, 189 insertions(+), 1 deletion(-)
create mode 100644 core/http/views/404.html
create mode 100644 core/http/views/index.html
create mode 100644 core/http/views/partials/footer.html
create mode 100644 core/http/views/partials/head.html
create mode 100644 core/http/views/partials/navbar.html
diff --git a/core/config/application_config.go b/core/config/application_config.go
index c2d4e13a..49b35f97 100644
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@@ -15,6 +15,7 @@ type ApplicationConfig struct {
ConfigFile string
ModelPath string
UploadLimitMB, Threads, ContextSize int
+ DisableWelcomePage bool
F16 bool
Debug, DisableMessage bool
ImageDir string
@@ -105,6 +106,10 @@ var EnableWatchDogBusyCheck = func(o *ApplicationConfig) {
o.WatchDogBusy = true
}
+var DisableWelcomePage = func(o *ApplicationConfig) {
+ o.DisableWelcomePage = true
+}
+
func SetWatchDogBusyTimeout(t time.Duration) AppOption {
return func(o *ApplicationConfig) {
o.WatchDogBusyTimeout = t
diff --git a/core/http/api.go b/core/http/api.go
index de0a4939..365407d8 100644
--- a/core/http/api.go
+++ b/core/http/api.go
@@ -1,12 +1,15 @@
package http
import (
+ "embed"
"encoding/json"
"errors"
- "github.com/go-skynet/LocalAI/pkg/utils"
+ "net/http"
"os"
"strings"
+ "github.com/go-skynet/LocalAI/pkg/utils"
+
"github.com/go-skynet/LocalAI/core/http/endpoints/elevenlabs"
"github.com/go-skynet/LocalAI/core/http/endpoints/localai"
"github.com/go-skynet/LocalAI/core/http/endpoints/openai"
@@ -21,6 +24,7 @@ import (
"github.com/gofiber/fiber/v2/middleware/cors"
"github.com/gofiber/fiber/v2/middleware/logger"
"github.com/gofiber/fiber/v2/middleware/recover"
+ "github.com/gofiber/template/html/v2"
)
func readAuthHeader(c *fiber.Ctx) string {
@@ -41,9 +45,14 @@ func readAuthHeader(c *fiber.Ctx) string {
return authHeader
}
+//go:embed views/*
+var viewsfs embed.FS
+
func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (*fiber.App, error) {
+ engine := html.NewFileSystem(http.FS(viewsfs), ".html")
// Return errors as JSON responses
app := fiber.New(fiber.Config{
+ Views: engine,
BodyLimit: appConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
DisableStartupMessage: appConfig.DisableMessage,
// Override default error handler
@@ -168,6 +177,21 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants)
utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles)
+ if !appConfig.DisableWelcomePage {
+ models, _ := ml.ListModels()
+ backendConfigs := cl.GetAllBackendConfigs()
+ app.Get("/", auth, func(c *fiber.Ctx) error {
+ // Render index
+ return c.Render("views/index", fiber.Map{
+ "Title": "LocalAI API - " + internal.PrintableVersion(),
+ "Version": internal.PrintableVersion(),
+ "Models": models,
+ "ModelsConfig": backendConfigs,
+ "ApplicationConfig": appConfig,
+ })
+ })
+ }
+
modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService)
app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint())
app.Get("/models/available", auth, modelGalleryEndpointService.ListModelFromGalleryEndpoint())
@@ -275,5 +299,21 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
app.Get("/metrics", localai.LocalAIMetricsEndpoint())
+ // Define a custom 404 handler
+ app.Use(func(c *fiber.Ctx) error {
+
+ // Check if the request accepts JSON
+ if string(c.Context().Request.Header.ContentType()) == "application/json" || len(c.Accepts("html")) == 0 {
+ // The client expects a JSON response
+ c.Status(fiber.StatusNotFound).JSON(fiber.Map{
+ "error": "Resource not found",
+ })
+ } else {
+ // The client expects an HTML response
+ c.Status(fiber.StatusNotFound).Render("views/404", fiber.Map{})
+ }
+ return nil
+ })
+
return app, nil
}
diff --git a/core/http/views/404.html b/core/http/views/404.html
new file mode 100644
index 00000000..359d8505
--- /dev/null
+++ b/core/http/views/404.html
@@ -0,0 +1,33 @@
+
+
+
+{{template "views/partials/head" .}}
+
+
+
+
+ {{template "views/partials/navbar" .}}
+
+
+
+
+
+
Nothing found!
+
+
+
+ {{template "views/partials/footer" .}}
+
+
+
+
diff --git a/core/http/views/index.html b/core/http/views/index.html
new file mode 100644
index 00000000..ad14f667
--- /dev/null
+++ b/core/http/views/index.html
@@ -0,0 +1,58 @@
+
+
+
+
+
+ {{.Title}}
+
+
+
+
+
+
+
+
+
+ {{template "views/partials/navbar" .}}
+
+
+
+
+
+
Installed models
+
We have {{len .ModelsConfig}} pre-loaded models available.
+
+ {{ range .ModelsConfig }}
+
+ {{.Name}}
+ {{ if .Usage }}
+ {{.Usage}}
+ {{ end }}
+ {{ if .Description }}
+ {{.Description}}
+ {{ end }}
+
+ {{ end }}
+
+
+
+
+ {{template "views/partials/footer" .}}
+
+
+
+
diff --git a/core/http/views/partials/footer.html b/core/http/views/partials/footer.html
new file mode 100644
index 00000000..7fc7e504
--- /dev/null
+++ b/core/http/views/partials/footer.html
@@ -0,0 +1,4 @@
+
+ LocalAI Version {{.Version}}
+ LocalAI © 2023-2024 Ettore Di Giacinto
+
\ No newline at end of file
diff --git a/core/http/views/partials/head.html b/core/http/views/partials/head.html
new file mode 100644
index 00000000..59cdea33
--- /dev/null
+++ b/core/http/views/partials/head.html
@@ -0,0 +1,13 @@
+
+
+
+ {{.Title}}
+
+
+
+
+
\ No newline at end of file
diff --git a/core/http/views/partials/navbar.html b/core/http/views/partials/navbar.html
new file mode 100644
index 00000000..2717f974
--- /dev/null
+++ b/core/http/views/partials/navbar.html
@@ -0,0 +1,15 @@
+
+
+
\ No newline at end of file
diff --git a/go.mod b/go.mod
index 8a43df1d..79068904 100644
--- a/go.mod
+++ b/go.mod
@@ -75,6 +75,9 @@ require (
github.com/docker/go-units v0.4.0 // indirect
github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
+ github.com/gofiber/template v1.8.3 // indirect
+ github.com/gofiber/template/html/v2 v2.1.1 // indirect
+ github.com/gofiber/utils v1.1.0 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/protobuf v1.5.3 // indirect
github.com/golang/snappy v0.0.2 // indirect
diff --git a/go.sum b/go.sum
index bef84d57..a2c5b912 100644
--- a/go.sum
+++ b/go.sum
@@ -96,6 +96,12 @@ github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5x
github.com/godbus/dbus/v5 v5.0.6/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/gofiber/fiber/v2 v2.50.0 h1:ia0JaB+uw3GpNSCR5nvC5dsaxXjRU5OEu36aytx+zGw=
github.com/gofiber/fiber/v2 v2.50.0/go.mod h1:21eytvay9Is7S6z+OgPi7c7n4++tnClWmhpimVHMimw=
+github.com/gofiber/template v1.8.3 h1:hzHdvMwMo/T2kouz2pPCA0zGiLCeMnoGsQZBTSYgZxc=
+github.com/gofiber/template v1.8.3/go.mod h1:bs/2n0pSNPOkRa5VJ8zTIvedcI/lEYxzV3+YPXdBvq8=
+github.com/gofiber/template/html/v2 v2.1.1 h1:QEy3O3EBkvwDthy5bXVGUseOyO6ldJoiDxlF4+MJiV8=
+github.com/gofiber/template/html/v2 v2.1.1/go.mod h1:2G0GHHOUx70C1LDncoBpe4T6maQbNa4x1CVNFW0wju0=
+github.com/gofiber/utils v1.1.0 h1:vdEBpn7AzIUJRhe+CiTOJdUcTg4Q9RK+pEa0KPbLdrM=
+github.com/gofiber/utils v1.1.0/go.mod h1:poZpsnhBykfnY1Mc0KeEa6mSHrS3dV0+oBWyeQmb2e0=
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
diff --git a/main.go b/main.go
index 651dd1c2..f000aa71 100644
--- a/main.go
+++ b/main.go
@@ -189,6 +189,12 @@ func main() {
EnvVars: []string{"WATCHDOG_IDLE"},
Value: false,
},
+ &cli.BoolFlag{
+ Name: "disable-welcome",
+ Usage: "Disable welcome pages",
+ EnvVars: []string{"DISABLE_WELCOME"},
+ Value: false,
+ },
&cli.BoolFlag{
Name: "enable-watchdog-busy",
Usage: "Enable watchdog for stopping busy backends that exceed a defined threshold.",
@@ -264,6 +270,11 @@ For a list of compatible model, check out: https://localai.io/model-compatibilit
idleWatchDog := ctx.Bool("enable-watchdog-idle")
busyWatchDog := ctx.Bool("enable-watchdog-busy")
+
+ if ctx.Bool("disable-welcome") {
+ opts = append(opts, config.DisableWelcomePage)
+ }
+
if idleWatchDog || busyWatchDog {
opts = append(opts, config.EnableWatchDog)
if idleWatchDog {
From 7ef5f3b473fdbbcf8d89cc942190a03a92c064c6 Mon Sep 17 00:00:00 2001
From: Gianluca Boiano <491117+M0Rf30@users.noreply.github.com>
Date: Wed, 27 Mar 2024 21:12:04 +0100
Subject: [PATCH 0052/2750] :arrow_up: Update M0Rf30/go-tiny-dream (#1911)
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index bd07eac7..7334ed4f 100644
--- a/Makefile
+++ b/Makefile
@@ -28,7 +28,7 @@ PIPER_VERSION?=9d0100873a7dbb0824dfea40e8cec70a1b110759
STABLEDIFFUSION_VERSION?=362df9da29f882dbf09ade61972d16a1f53c3485
# tinydream version
-TINYDREAM_VERSION?=772a9c0d9aaf768290e63cca3c904fe69faf677a
+TINYDREAM_VERSION?=22a12a4bc0ac5455856f28f3b771331a551a4293
export BUILD_TYPE?=
export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
From 0c0efc871c9673efa733eccc880b65f987006051 Mon Sep 17 00:00:00 2001
From: cryptk <421501+cryptk@users.noreply.github.com>
Date: Wed, 27 Mar 2024 15:12:19 -0500
Subject: [PATCH 0053/2750] fix(build): better CI logging and correct some
build failure modes in Makefile (#1899)
* feat: group make output by target when running parallelized builds in CI
* fix: quote GO_TAGS in makefile to fix handling of whitespace in value
* fix: set CPATH to find opencv2 in it's commonly installed location
* fix: add missing go mod dropreplace for go-llama.cpp
* chore: remove opencv symlink from github workflows
---
.github/workflows/image-pr.yml | 4 ++--
.github/workflows/image.yml | 4 ++--
.github/workflows/release.yaml | 1 -
.github/workflows/test-extra.yml | 16 ++++++++--------
.github/workflows/test.yml | 2 +-
Makefile | 7 ++++---
6 files changed, 17 insertions(+), 17 deletions(-)
diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml
index 17456617..aa59188c 100644
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -22,7 +22,7 @@ jobs:
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
base-image: ${{ matrix.base-image }}
- makeflags: "-j3"
+ makeflags: "--jobs=3 --output-sync=target"
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
@@ -81,7 +81,7 @@ jobs:
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
base-image: ${{ matrix.base-image }}
- makeflags: "-j3"
+ makeflags: "--jobs=3 --output-sync=target"
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index 6e93cb9a..40deb0ec 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -27,7 +27,7 @@ jobs:
runs-on: ${{ matrix.runs-on }}
base-image: ${{ matrix.base-image }}
aio: ${{ matrix.aio }}
- makeflags: "-j3"
+ makeflags: "--jobs=3 --output-sync=target"
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
@@ -207,7 +207,7 @@ jobs:
runs-on: ${{ matrix.runs-on }}
aio: ${{ matrix.aio }}
base-image: ${{ matrix.base-image }}
- makeflags: "-j3"
+ makeflags: "--jobs=3 --output-sync=target"
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index aa0a270b..6ac816ee 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -102,7 +102,6 @@ jobs:
- name: Dependencies
run: |
sudo apt-get install -y --no-install-recommends libopencv-dev
- sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
- name: Build stablediffusion
run: |
make backend-assets/grpc/stablediffusion
diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml
index 68da2c56..5f61835d 100644
--- a/.github/workflows/test-extra.yml
+++ b/.github/workflows/test-extra.yml
@@ -33,7 +33,7 @@ jobs:
sudo apt-get update && \
sudo apt-get install -y conda
sudo apt-get install -y ca-certificates cmake curl patch
- sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
+ sudo apt-get install -y libopencv-dev
sudo rm -rfv /usr/bin/conda || true
@@ -62,7 +62,7 @@ jobs:
sudo apt-get update && \
sudo apt-get install -y conda
sudo apt-get install -y ca-certificates cmake curl patch
- sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
+ sudo apt-get install -y libopencv-dev
sudo rm -rfv /usr/bin/conda || true
@@ -91,7 +91,7 @@ jobs:
sudo apt-get update && \
sudo apt-get install -y conda
sudo apt-get install -y ca-certificates cmake curl patch
- sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
+ sudo apt-get install -y libopencv-dev
sudo rm -rfv /usr/bin/conda || true
@@ -121,7 +121,7 @@ jobs:
sudo apt-get update && \
sudo apt-get install -y conda
sudo apt-get install -y ca-certificates cmake curl patch
- sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
+ sudo apt-get install -y libopencv-dev
sudo rm -rfv /usr/bin/conda || true
@@ -152,7 +152,7 @@ jobs:
sudo apt-get update && \
sudo apt-get install -y conda
sudo apt-get install -y ca-certificates cmake curl patch
- sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
+ sudo apt-get install -y libopencv-dev
sudo rm -rfv /usr/bin/conda || true
@@ -223,7 +223,7 @@ jobs:
# sudo apt-get update && \
# sudo apt-get install -y conda
# sudo apt-get install -y ca-certificates cmake curl patch
- # sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
+ # sudo apt-get install -y libopencv-dev
# sudo rm -rfv /usr/bin/conda || true
@@ -255,7 +255,7 @@ jobs:
# sudo apt-get update && \
# sudo apt-get install -y conda
# sudo apt-get install -y ca-certificates cmake curl patch
- # sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
+ # sudo apt-get install -y libopencv-dev
# sudo rm -rfv /usr/bin/conda || true
# - name: Test vllm
# run: |
@@ -281,7 +281,7 @@ jobs:
sudo apt-get update && \
sudo apt-get install -y conda
sudo apt-get install -y ca-certificates cmake curl patch
- sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
+ sudo apt-get install -y libopencv-dev
sudo rm -rfv /usr/bin/conda || true
- name: Test vall-e-x
run: |
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 6d837821..203aeeca 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -75,7 +75,7 @@ jobs:
sudo apt-get update && \
sudo apt-get install -y conda
sudo apt-get install -y ca-certificates cmake curl patch
- sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
+ sudo apt-get install -y libopencv-dev
sudo rm -rfv /usr/bin/conda || true
PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers
diff --git a/Makefile b/Makefile
index 7334ed4f..da91fb2d 100644
--- a/Makefile
+++ b/Makefile
@@ -224,7 +224,7 @@ sources/go-stable-diffusion:
cd sources/go-stable-diffusion && git checkout -b build $(STABLEDIFFUSION_VERSION) && git submodule update --init --recursive --depth 1
sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
- $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
+ CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
## tiny-dream
sources/go-tiny-dream:
@@ -263,6 +263,7 @@ dropreplace:
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
$(GOCMD) mod edit -dropreplace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang
+ $(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
prepare-sources: get-sources replace
$(GOCMD) mod download
@@ -531,7 +532,7 @@ backend-assets/grpc/rwkv: sources/go-rwkv sources/go-rwkv/librwkv.a backend-asse
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
- CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-stable-diffusion/ LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
+ CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc
@@ -556,7 +557,7 @@ docker:
docker build \
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
- --build-arg GO_TAGS=$(GO_TAGS) \
+ --build-arg GO_TAGS="$(GO_TAGS)" \
--build-arg BUILD_TYPE=$(BUILD_TYPE) \
-t $(DOCKER_IMAGE) .
From 160eb48b2b2aa74f0c30046da483cfd7cd356dc2 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Wed, 27 Mar 2024 22:47:59 +0100
Subject: [PATCH 0054/2750] Update quickstart.md
---
docs/content/docs/getting-started/quickstart.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/docs/content/docs/getting-started/quickstart.md b/docs/content/docs/getting-started/quickstart.md
index 33ec4cfa..94500655 100644
--- a/docs/content/docs/getting-started/quickstart.md
+++ b/docs/content/docs/getting-started/quickstart.md
@@ -50,8 +50,8 @@ Start the image with Docker:
```bash
docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu
# For Nvidia GPUs:
-# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:latest-aio-gpu-cuda-11
-# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:latest-aio-gpu-cuda-12
+# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:latest-aio-gpu-nvidia-cuda-11
+# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:latest-aio-gpu-nvidia-cuda-12
```
From 2266d8263c5beb8e4386de5c72a98c5d9f348e35 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Wed, 27 Mar 2024 22:48:46 +0100
Subject: [PATCH 0055/2750] Update README.md
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 5f1bfe0b..3c5c1a52 100644
--- a/README.md
+++ b/README.md
@@ -79,7 +79,7 @@ For those in a hurry, here's a straightforward one-liner to launch a LocalAI AIO
```bash
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
# or, if you have an Nvidia GPU:
-# docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-cuda12
+# docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
```
## 🚀 [Features](https://localai.io/features/)
From 07c4bdda7c786c382950d7d3ae5982eccfbccb9a Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Wed, 27 Mar 2024 22:57:59 +0100
Subject: [PATCH 0056/2750] :arrow_up: Update ggerganov/llama.cpp (#1913)
Signed-off-by: GitHub
Co-authored-by: mudler
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index da91fb2d..e61fa6d6 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=557410b8f06380560155ac7fcb8316d71ddc9837
+CPPLLAMA_VERSION?=a016026a3ac16d8c9b993a3573f19b9556d67de4
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
From 07c49ee4b870760fad81e38de6f3be4d775532a4 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Wed, 27 Mar 2024 23:53:13 +0100
Subject: [PATCH 0057/2750] :arrow_up: Update ggerganov/whisper.cpp (#1914)
Signed-off-by: GitHub
Co-authored-by: mudler
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index e61fa6d6..3258bbe3 100644
--- a/Makefile
+++ b/Makefile
@@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
# whisper.cpp version
-WHISPER_CPP_VERSION?=1558ec5a16cb2b2a0bf54815df1d41f83dc3815b
+WHISPER_CPP_VERSION?=2948c740a2bf43190b8e3badb6f1e147f11f96d1
# bert.cpp version
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
From 23b833d1715677b0f6388f80f8fa1e0c61b64488 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Thu, 28 Mar 2024 12:42:37 +0100
Subject: [PATCH 0058/2750] Update run-other-models.md
Signed-off-by: Ettore Di Giacinto
---
docs/content/docs/getting-started/run-other-models.md | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/docs/content/docs/getting-started/run-other-models.md b/docs/content/docs/getting-started/run-other-models.md
index 4420550d..2b72b93d 100644
--- a/docs/content/docs/getting-started/run-other-models.md
+++ b/docs/content/docs/getting-started/run-other-models.md
@@ -80,7 +80,7 @@ To customize the models, see [Model customization]({{%relref "docs/getting-start
| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 transformers-tinyllama``` |
| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 codellama-7b``` |
| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core codellama-7b-gguf``` |
-| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core hermes-2-pro-mistral``` |
+| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core hermes-2-pro-mistral``` |
{{% /tab %}}
@@ -111,7 +111,7 @@ To customize the models, see [Model customization]({{%relref "docs/getting-start
| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 transformers-tinyllama``` |
| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 codellama-7b``` |
| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core codellama-7b-gguf``` |
-| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core hermes-2-pro-mistral``` |
+| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core hermes-2-pro-mistral``` |
{{% /tab %}}
{{< /tabs >}}
@@ -123,4 +123,4 @@ To customize the models, see [Model customization]({{%relref "docs/getting-start
docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava phi-2
```
-{{% /alert %}}
\ No newline at end of file
+{{% /alert %}}
From 13ccd2afef1f00f2579fb7247d52686f5d06f5db Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Thu, 28 Mar 2024 18:16:58 +0100
Subject: [PATCH 0059/2750] docs(aio-usage): update docs to show examples
(#1921)
Signed-off-by: Ettore Di Giacinto
---
.../docs/getting-started/quickstart.md | 204 +++++++++++++++++-
docs/content/docs/reference/aio-images.md | 36 ++--
2 files changed, 222 insertions(+), 18 deletions(-)
diff --git a/docs/content/docs/getting-started/quickstart.md b/docs/content/docs/getting-started/quickstart.md
index 94500655..ff05afaf 100644
--- a/docs/content/docs/getting-started/quickstart.md
+++ b/docs/content/docs/getting-started/quickstart.md
@@ -68,8 +68,8 @@ services:
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080/readyz"]
interval: 1m
- timeout: 120m
- retries: 120
+ timeout: 20m
+ retries: 5
ports:
- 8080:8080
environment:
@@ -89,8 +89,208 @@ services:
For a list of all the container-images available, see [Container images]({{%relref "docs/reference/container-images" %}}). To learn more about All-in-one images instead, see [All-in-one Images]({{%relref "docs/reference/aio-images" %}}).
+{{% alert icon="💡 Models caching" %}}
+
+The **AIO** image will download the needed models on the first run if not already present and store those in `/build/models` inside the container. The AIO models will be automatically updated with new versions of AIO images.
+
+You can change the directory inside the container by specifying a `MODELS_PATH` environment variable (or `--models-path`).
+
+If you want to use a named model or a local directory, you can mount it as a volume to `/build/models`:
+
+```bash
+docker run -p 8080:8080 --name local-ai -ti -v $PWD/models:/build/models localai/localai:latest-aio-cpu
+```
+
+or associate a volume:
+
+```bash
+docker create volume localai-models
+docker run -p 8080:8080 --name local-ai -ti -v localai-models:/build/models localai/localai:latest-aio-cpu
+```
+
+{{% /alert %}}
+
+## Try it out
+
+LocalAI does not ship a webui by default, however you can use 3rd party projects to interact with it (see also [All-in-one Images]({{%relref "docs/integrations" %}}) ). However, you can test out the API endpoints using `curl`.
+
+### Text Generation
+
+Creates a model response for the given chat conversation. [OpenAI documentation](https://platform.openai.com/docs/api-reference/chat/create).
+
+
+
+```bash
+curl http://localhost:8080/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -d '{ "model": "gpt-4", "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] }'
+```
+
+
+
+### GPT Vision
+
+Understand images.
+
+
+
+```bash
+curl http://localhost:8080/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "gpt-4-vision-preview",
+ "messages": [
+ {
+ "role": "user", "content": [
+ {"type":"text", "text": "What is in the image?"},
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
+ }
+ }
+ ],
+ "temperature": 0.9
+ }
+ ]
+ }'
+```
+
+
+
+### Function calling
+
+Call functions
+
+
+
+```bash
+curl https://localhost:8080/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "gpt-4",
+ "messages": [
+ {
+ "role": "user",
+ "content": "What is the weather like in Boston?"
+ }
+ ],
+ "tools": [
+ {
+ "type": "function",
+ "function": {
+ "name": "get_current_weather",
+ "description": "Get the current weather in a given location",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "type": "string",
+ "description": "The city and state, e.g. San Francisco, CA"
+ },
+ "unit": {
+ "type": "string",
+ "enum": ["celsius", "fahrenheit"]
+ }
+ },
+ "required": ["location"]
+ }
+ }
+ }
+ ],
+ "tool_choice": "auto"
+ }'
+```
+
+
+
+### Image Generation
+
+Creates an image given a prompt. [OpenAI documentation](https://platform.openai.com/docs/api-reference/images/create).
+
+
+
+```bash
+curl http://localhost:8080/v1/images/generations \
+ -H "Content-Type: application/json" -d '{
+ "prompt": "A cute baby sea otter",
+ "size": "256x256"
+ }'
+```
+
+
+
+### Text to speech
+
+
+Generates audio from the input text. [OpenAI documentation](https://platform.openai.com/docs/api-reference/audio/createSpeech).
+
+
+
+```bash
+curl http://localhost:8080/v1/audio/speech \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "tts-1",
+ "input": "The quick brown fox jumped over the lazy dog.",
+ "voice": "alloy"
+ }' \
+ --output speech.mp3
+```
+
+
+
+
+### Audio Transcription
+
+Transcribes audio into the input language. [OpenAI Documentation](https://platform.openai.com/docs/api-reference/audio/createTranscription).
+
+
+
+Download first a sample to transcribe:
+
+```bash
+wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
+```
+
+Send the example audio file to the transcriptions endpoint :
+```bash
+curl http://localhost:8080/v1/audio/transcriptions \
+ -H "Content-Type: multipart/form-data" \
+ -F file="@$PWD/gb1.ogg" -F model="whisper-1"
+```
+
+
+
+### Embeddings Generation
+
+Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms. [OpenAI Embeddings](https://platform.openai.com/docs/api-reference/embeddings).
+
+
+
+```bash
+curl http://localhost:8080/embeddings \
+ -X POST -H "Content-Type: application/json" \
+ -d '{
+ "input": "Your text string goes here",
+ "model": "text-embedding-ada-002"
+ }'
+```
+
+
+
+{{% alert icon="💡" %}}
+
+Don't use the model file as `model` in the request unless you want to handle the prompt template for yourself.
+
+Use the model names like you would do with OpenAI like in the examples below. For instance `gpt-4-vision-preview`, or `gpt-4`.
+
+{{% /alert %}}
+
## What's next?
+There is much more to explore! run any model from huggingface, video generation, and voice cloning with LocalAI, check out the [features]({{%relref "docs/features" %}}) section for a full overview.
+
Explore further resources and community contributions:
- [Build LocalAI and the container image]({{%relref "docs/getting-started/build" %}})
diff --git a/docs/content/docs/reference/aio-images.md b/docs/content/docs/reference/aio-images.md
index 331892e9..c2cb57ba 100644
--- a/docs/content/docs/reference/aio-images.md
+++ b/docs/content/docs/reference/aio-images.md
@@ -7,15 +7,28 @@ weight = 26
All-In-One images are images that come pre-configured with a set of models and backends to fully leverage almost all the LocalAI featureset. These images are available for both CPU and GPU environments. The AIO images are designed to be easy to use and requires no configuration. Models configuration can be found [here](https://github.com/mudler/LocalAI/tree/master/aio) separated by size.
-What you can find configured out of the box:
+In the AIO images there are models configured with the names of OpenAI models, however, they are really backed by Open Source models. You can find the table below
-- Image generation
-- Text generation
-- Text to audio
-- Audio transcription
-- Embeddings
-- GPT Vision
+| Category | Model name | Real model |
+| Text Generation | `gpt-4` | `phi-2`(CPU) or `hermes-2-pro-mistral`(GPU) |
+| Multimodal | `gpt-4-vision-preview` | `bakllava`(CPU) or `llava-1.6-mistral`(GPU) |
+| Text generation | `stablediffusion` | `stablediffusion`(CPU) `dreamshaper-8` (GPU) |
+| Audio transcription | `whisper-1` | `whisper` with the `whisper-base` model |
+| Text to Audio | `tts-1` | the `en-us-amy-low.onnx` model with `rhasspy` |
+| Embeddings | `text-embedding-ada-002` | |
+## Usage
+
+Select the image (CPU or GPU) and start the container with Docker:
+
+```bash
+# CPU example
+docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu
+```
+
+LocalAI will automatically download all the required models, and the API will be available at [localhost:8080](http://localhost:8080/v1/models).
+
+## Available images
| Description | Quay | Docker Hub |
| --- | --- |-----------------------------------------------|
@@ -37,12 +50,3 @@ The AIO Images are inheriting the same environment variables as the base images
| `MODELS` | Auto-detected | A list of models YAML Configuration file URI/URL (see also [running models]({{%relref "docs/getting-started/run-other-models" %}})) |
-## Example
-
-Start the image with Docker:
-
-```bash
-docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu
-```
-
-LocalAI will automatically download all the required models, and will be available at [localhost:8080](http://localhost:8080/v1/models).
From 8477e8fac39641fcb6adda9ae02392ac97bfd4e4 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Thu, 28 Mar 2024 18:28:30 +0100
Subject: [PATCH 0060/2750] Update quickstart.md
Signed-off-by: Ettore Di Giacinto
---
docs/content/docs/getting-started/quickstart.md | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/docs/content/docs/getting-started/quickstart.md b/docs/content/docs/getting-started/quickstart.md
index ff05afaf..716fe154 100644
--- a/docs/content/docs/getting-started/quickstart.md
+++ b/docs/content/docs/getting-started/quickstart.md
@@ -89,9 +89,9 @@ services:
For a list of all the container-images available, see [Container images]({{%relref "docs/reference/container-images" %}}). To learn more about All-in-one images instead, see [All-in-one Images]({{%relref "docs/reference/aio-images" %}}).
-{{% alert icon="💡 Models caching" %}}
+{{% alert icon="💡" %}}
-The **AIO** image will download the needed models on the first run if not already present and store those in `/build/models` inside the container. The AIO models will be automatically updated with new versions of AIO images.
+**Models caching**: The **AIO** image will download the needed models on the first run if not already present and store those in `/build/models` inside the container. The AIO models will be automatically updated with new versions of AIO images.
You can change the directory inside the container by specifying a `MODELS_PATH` environment variable (or `--models-path`).
@@ -104,7 +104,7 @@ docker run -p 8080:8080 --name local-ai -ti -v $PWD/models:/build/models localai
or associate a volume:
```bash
-docker create volume localai-models
+docker volume create localai-models
docker run -p 8080:8080 --name local-ai -ti -v localai-models:/build/models localai/localai:latest-aio-cpu
```
@@ -298,4 +298,4 @@ Explore further resources and community contributions:
- [Run other models]({{%relref "docs/getting-started/run-other-models" %}})
- [Container images]({{%relref "docs/reference/container-images" %}})
- [All-in-one Images]({{%relref "docs/reference/aio-images" %}})
-- [Examples](https://github.com/mudler/LocalAI/tree/master/examples#examples)
\ No newline at end of file
+- [Examples](https://github.com/mudler/LocalAI/tree/master/examples#examples)
From 4e79294f978fc411508a37fd650dfd0a78a9df26 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Thu, 28 Mar 2024 19:52:40 +0100
Subject: [PATCH 0061/2750] Update README.md
Signed-off-by: Ettore Di Giacinto
---
README.md | 1 +
1 file changed, 1 insertion(+)
diff --git a/README.md b/README.md
index 3c5c1a52..76a5fc08 100644
--- a/README.md
+++ b/README.md
@@ -50,6 +50,7 @@
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
+- Landing page: https://github.com/mudler/LocalAI/pull/1922
- Openvino support: https://github.com/mudler/LocalAI/pull/1892
- Vector store: https://github.com/mudler/LocalAI/pull/1795
- All-in-one container image: https://github.com/mudler/LocalAI/issues/1855
From bf65ed6eb84d5b856c412d607827dc057c4585d4 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Thu, 28 Mar 2024 21:52:52 +0100
Subject: [PATCH 0062/2750] feat(webui): add partials, show backends associated
to models (#1922)
* feat(webui): add partials, show backends associated to models
* fix(auth): put assistant and backend under auth
---
core/http/api.go | 88 ++++++++++++++------------------------
core/http/render.go | 80 ++++++++++++++++++++++++++++++++++
core/http/views/index.html | 56 +++++++++++-------------
go.mod | 10 +++--
go.sum | 34 ++++++---------
5 files changed, 155 insertions(+), 113 deletions(-)
create mode 100644 core/http/render.go
diff --git a/core/http/api.go b/core/http/api.go
index 365407d8..24216737 100644
--- a/core/http/api.go
+++ b/core/http/api.go
@@ -1,10 +1,8 @@
package http
import (
- "embed"
"encoding/json"
"errors"
- "net/http"
"os"
"strings"
@@ -24,7 +22,6 @@ import (
"github.com/gofiber/fiber/v2/middleware/cors"
"github.com/gofiber/fiber/v2/middleware/logger"
"github.com/gofiber/fiber/v2/middleware/recover"
- "github.com/gofiber/template/html/v2"
)
func readAuthHeader(c *fiber.Ctx) string {
@@ -45,14 +42,10 @@ func readAuthHeader(c *fiber.Ctx) string {
return authHeader
}
-//go:embed views/*
-var viewsfs embed.FS
-
func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (*fiber.App, error) {
- engine := html.NewFileSystem(http.FS(viewsfs), ".html")
// Return errors as JSON responses
app := fiber.New(fiber.Config{
- Views: engine,
+ Views: renderEngine(),
BodyLimit: appConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
DisableStartupMessage: appConfig.DisableMessage,
// Override default error handler
@@ -177,20 +170,13 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants)
utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles)
- if !appConfig.DisableWelcomePage {
- models, _ := ml.ListModels()
- backendConfigs := cl.GetAllBackendConfigs()
- app.Get("/", auth, func(c *fiber.Ctx) error {
- // Render index
- return c.Render("views/index", fiber.Map{
- "Title": "LocalAI API - " + internal.PrintableVersion(),
- "Version": internal.PrintableVersion(),
- "Models": models,
- "ModelsConfig": backendConfigs,
- "ApplicationConfig": appConfig,
- })
- })
- }
+ welcomeRoute(
+ app,
+ cl,
+ ml,
+ appConfig,
+ auth,
+ )
modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService)
app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint())
@@ -224,24 +210,24 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
app.Post("/edits", auth, openai.EditEndpoint(cl, ml, appConfig))
// assistant
- app.Get("/v1/assistants", openai.ListAssistantsEndpoint(cl, ml, appConfig))
- app.Get("/assistants", openai.ListAssistantsEndpoint(cl, ml, appConfig))
- app.Post("/v1/assistants", openai.CreateAssistantEndpoint(cl, ml, appConfig))
- app.Post("/assistants", openai.CreateAssistantEndpoint(cl, ml, appConfig))
- app.Delete("/v1/assistants/:assistant_id", openai.DeleteAssistantEndpoint(cl, ml, appConfig))
- app.Delete("/assistants/:assistant_id", openai.DeleteAssistantEndpoint(cl, ml, appConfig))
- app.Get("/v1/assistants/:assistant_id", openai.GetAssistantEndpoint(cl, ml, appConfig))
- app.Get("/assistants/:assistant_id", openai.GetAssistantEndpoint(cl, ml, appConfig))
- app.Post("/v1/assistants/:assistant_id", openai.ModifyAssistantEndpoint(cl, ml, appConfig))
- app.Post("/assistants/:assistant_id", openai.ModifyAssistantEndpoint(cl, ml, appConfig))
- app.Get("/v1/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
- app.Get("/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
- app.Post("/v1/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
- app.Post("/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
- app.Delete("/v1/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
- app.Delete("/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
- app.Get("/v1/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(cl, ml, appConfig))
- app.Get("/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(cl, ml, appConfig))
+ app.Get("/v1/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig))
+ app.Get("/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig))
+ app.Post("/v1/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig))
+ app.Post("/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig))
+ app.Delete("/v1/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig))
+ app.Delete("/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig))
+ app.Get("/v1/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig))
+ app.Get("/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig))
+ app.Post("/v1/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig))
+ app.Post("/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig))
+ app.Get("/v1/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
+ app.Get("/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
+ app.Post("/v1/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
+ app.Post("/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
+ app.Delete("/v1/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
+ app.Delete("/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
+ app.Get("/v1/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig))
+ app.Get("/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig))
// files
app.Post("/v1/files", auth, openai.UploadFilesEndpoint(cl, appConfig))
@@ -290,30 +276,18 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
// Experimental Backend Statistics Module
backendMonitor := services.NewBackendMonitor(cl, ml, appConfig) // Split out for now
- app.Get("/backend/monitor", localai.BackendMonitorEndpoint(backendMonitor))
- app.Post("/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitor))
+ app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(backendMonitor))
+ app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(backendMonitor))
// models
app.Get("/v1/models", auth, openai.ListModelsEndpoint(cl, ml))
app.Get("/models", auth, openai.ListModelsEndpoint(cl, ml))
- app.Get("/metrics", localai.LocalAIMetricsEndpoint())
+ app.Get("/metrics", auth, localai.LocalAIMetricsEndpoint())
// Define a custom 404 handler
- app.Use(func(c *fiber.Ctx) error {
-
- // Check if the request accepts JSON
- if string(c.Context().Request.Header.ContentType()) == "application/json" || len(c.Accepts("html")) == 0 {
- // The client expects a JSON response
- c.Status(fiber.StatusNotFound).JSON(fiber.Map{
- "error": "Resource not found",
- })
- } else {
- // The client expects an HTML response
- c.Status(fiber.StatusNotFound).Render("views/404", fiber.Map{})
- }
- return nil
- })
+ // Note: keep this at the bottom!
+ app.Use(notFoundHandler)
return app, nil
}
diff --git a/core/http/render.go b/core/http/render.go
new file mode 100644
index 00000000..c5045868
--- /dev/null
+++ b/core/http/render.go
@@ -0,0 +1,80 @@
+package http
+
+import (
+ "embed"
+ "fmt"
+ "html/template"
+ "net/http"
+
+ "github.com/Masterminds/sprig/v3"
+ "github.com/go-skynet/LocalAI/core/config"
+ "github.com/go-skynet/LocalAI/core/schema"
+ "github.com/go-skynet/LocalAI/internal"
+ "github.com/go-skynet/LocalAI/pkg/model"
+ "github.com/gofiber/fiber/v2"
+ fiberhtml "github.com/gofiber/template/html/v2"
+ "github.com/russross/blackfriday"
+)
+
+//go:embed views/*
+var viewsfs embed.FS
+
+func notFoundHandler(c *fiber.Ctx) error {
+ // Check if the request accepts JSON
+ if string(c.Context().Request.Header.ContentType()) == "application/json" || len(c.Accepts("html")) == 0 {
+ // The client expects a JSON response
+ c.Status(fiber.StatusNotFound).JSON(schema.ErrorResponse{
+ Error: &schema.APIError{Message: "Resource not found", Code: fiber.StatusNotFound},
+ })
+ } else {
+ // The client expects an HTML response
+ c.Status(fiber.StatusNotFound).Render("views/404", fiber.Map{})
+ }
+ return nil
+}
+
+func welcomeRoute(
+ app *fiber.App,
+ cl *config.BackendConfigLoader,
+ ml *model.ModelLoader,
+ appConfig *config.ApplicationConfig,
+ auth func(*fiber.Ctx) error,
+) {
+ if appConfig.DisableWelcomePage {
+ return
+ }
+
+ models, _ := ml.ListModels()
+ backendConfigs := cl.GetAllBackendConfigs()
+
+ app.Get("/", auth, func(c *fiber.Ctx) error {
+ summary := fiber.Map{
+ "Title": "LocalAI API - " + internal.PrintableVersion(),
+ "Version": internal.PrintableVersion(),
+ "Models": models,
+ "ModelsConfig": backendConfigs,
+ "ApplicationConfig": appConfig,
+ }
+
+ if string(c.Context().Request.Header.ContentType()) == "application/json" || len(c.Accepts("html")) == 0 {
+ // The client expects a JSON response
+ return c.Status(fiber.StatusOK).JSON(summary)
+ } else {
+ // Render index
+ return c.Render("views/index", summary)
+ }
+ })
+
+}
+
+func renderEngine() *fiberhtml.Engine {
+ engine := fiberhtml.NewFileSystem(http.FS(viewsfs), ".html")
+ engine.AddFuncMap(sprig.FuncMap())
+ engine.AddFunc("MDToHTML", markDowner)
+ return engine
+}
+
+func markDowner(args ...interface{}) template.HTML {
+ s := blackfriday.MarkdownCommon([]byte(fmt.Sprintf("%s", args...)))
+ return template.HTML(s)
+}
diff --git a/core/http/views/index.html b/core/http/views/index.html
index ad14f667..287ee1ce 100644
--- a/core/http/views/index.html
+++ b/core/http/views/index.html
@@ -1,56 +1,50 @@
-
-
-
- {{.Title}}
-
-
-
-
-
-
+{{template "views/partials/head" .}}
+
{{template "views/partials/navbar" .}}
-
Installed models
+
Installed models
We have {{len .ModelsConfig}} pre-loaded models available.
-
+
{{ range .ModelsConfig }}
-
- {{.Name}}
- {{ if .Usage }}
- {{.Usage}}
- {{ end }}
- {{ if .Description }}
- {{.Description}}
- {{ end }}
+
+
+
{{.Name}}
+ {{ if .Backend }}
+
+
+ {{.Backend}}
+
+ {{ else }}
+
+ auto
+
+ {{ end }}
+
+
{{ end }}
-
+
{{template "views/partials/footer" .}}
diff --git a/go.mod b/go.mod
index 79068904..0ac0d8e5 100644
--- a/go.mod
+++ b/go.mod
@@ -4,6 +4,8 @@ go 1.21
require (
github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf
+ github.com/Masterminds/sprig/v3 v3.2.3
+ github.com/charmbracelet/glamour v0.6.0
github.com/donomii/go-rwkv.cpp v0.0.0-20230715075832-c898cd0f62df
github.com/fsnotify/fsnotify v1.7.0
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230628193450-85ed71aaec8e
@@ -11,6 +13,8 @@ require (
github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1
github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428
github.com/gofiber/fiber/v2 v2.50.0
+ github.com/gofiber/template/html/v2 v2.1.1
+ github.com/gomarkdown/markdown v0.0.0-20231222211730-1d6d20845b47
github.com/google/uuid v1.3.1
github.com/hashicorp/go-multierror v1.1.1
github.com/hpcloud/tail v1.0.0
@@ -21,6 +25,7 @@ require (
github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20231022042237-c25dc5193530
github.com/onsi/ginkgo/v2 v2.13.0
github.com/onsi/gomega v1.28.1
+ github.com/ory/dockertest/v3 v3.10.0
github.com/otiai10/openaigo v1.6.0
github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5
github.com/prometheus/client_golang v1.17.0
@@ -56,7 +61,6 @@ require (
github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78 // indirect
github.com/Masterminds/goutils v1.1.1 // indirect
github.com/Masterminds/semver/v3 v3.2.0 // indirect
- github.com/Masterminds/sprig/v3 v3.2.3 // indirect
github.com/Microsoft/go-winio v0.6.0 // indirect
github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 // indirect
github.com/alecthomas/chroma v0.10.0 // indirect
@@ -65,7 +69,6 @@ require (
github.com/beorn7/perks v1.0.1 // indirect
github.com/cenkalti/backoff/v4 v4.1.3 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
- github.com/charmbracelet/glamour v0.6.0 // indirect
github.com/containerd/continuity v0.3.0 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/dlclark/regexp2 v1.8.1 // indirect
@@ -76,7 +79,6 @@ require (
github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/gofiber/template v1.8.3 // indirect
- github.com/gofiber/template/html/v2 v2.1.1 // indirect
github.com/gofiber/utils v1.1.0 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/protobuf v1.5.3 // indirect
@@ -100,7 +102,6 @@ require (
github.com/opencontainers/go-digest v1.0.0 // indirect
github.com/opencontainers/image-spec v1.0.2 // indirect
github.com/opencontainers/runc v1.1.5 // indirect
- github.com/ory/dockertest/v3 v3.10.0 // indirect
github.com/pierrec/lz4/v4 v4.1.2 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pkoukk/tiktoken-go v0.1.2 // indirect
@@ -108,6 +109,7 @@ require (
github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16 // indirect
github.com/prometheus/common v0.44.0 // indirect
github.com/prometheus/procfs v0.11.1 // indirect
+ github.com/russross/blackfriday v1.6.0 // indirect
github.com/shopspring/decimal v1.2.0 // indirect
github.com/sirupsen/logrus v1.8.1 // indirect
github.com/spf13/cast v1.3.1 // indirect
diff --git a/go.sum b/go.sum
index a2c5b912..bec43989 100644
--- a/go.sum
+++ b/go.sum
@@ -1,8 +1,6 @@
github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78 h1:w+iIsaOQNcT7OZ575w+acHgRric5iCyQh+xv+KJ4HB8=
github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX56iTiv29bbRTIsUNlaFfuhWRQBWjQdVyAevI8=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
-github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf h1:UgjXLcE9I+VaVz7uBIlzAnyZIXwiDlIiTWqCh159aUI=
-github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf/go.mod h1:UOf2Mb/deUri5agct5OJ4SLWjhI+kZKbsUVUeRb24I0=
github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=
github.com/Masterminds/semver/v3 v3.2.0 h1:3MEsd0SM6jqZojhjLWWeBY+Kcjy9i6MQAeY7YgDP83g=
@@ -43,6 +41,7 @@ github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSV
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w=
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
+github.com/creack/pty v1.1.11 h1:07n33Z8lZxZ2qwegKbObQohDhXDQxiMMz1NOUGYlesw=
github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@@ -59,19 +58,14 @@ github.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKoh
github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec=
github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw=
github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
-github.com/donomii/go-rwkv.cpp v0.0.0-20230715075832-c898cd0f62df h1:qVcBEZlvp5A1gGWNJj02xyDtbsUI2hohlQMSB1fgER4=
-github.com/donomii/go-rwkv.cpp v0.0.0-20230715075832-c898cd0f62df/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 h1:iFaUwBSo5Svw6L7HYpRu/0lE3e0BaElwnNO1qkNQxBY=
github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s=
github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
-github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4=
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
-github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230628193450-85ed71aaec8e h1:KtbU2JR3lJuXFASHG2+sVLucfMPBjWKUUKByX6C81mQ=
-github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230628193450-85ed71aaec8e/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs=
github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA=
@@ -85,10 +79,10 @@ github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
-github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1 h1:yXvc7QfGtoZ51tUW/YVjoTwAfh8HG88XU7UOrbNlz5Y=
-github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1/go.mod h1:fYjkCDRzC+oRLHSjQoajmYK6AmeJnmEanV27CClAcDc=
github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428 h1:WYjkXL0Nw7dN2uDBMVCWQ8xLavrIhjF/DLczuh5L9TY=
github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428/go.mod h1:iub0ugfTnflE3rcIuqV2pQSo15nEw3GLW/utm5gyERo=
+github.com/go-sql-driver/mysql v1.7.1 h1:lUIinVbN1DY0xBg0eMOzmmtGoHwWBbvnWubQUrtU8EI=
+github.com/go-sql-driver/mysql v1.7.1/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI=
github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE=
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
@@ -117,6 +111,8 @@ github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg
github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
github.com/golang/snappy v0.0.2 h1:aeE13tS0IiQgFjYdoL8qN3K1N2bXXtI6Vi51/y7BpMw=
github.com/golang/snappy v0.0.2/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
+github.com/gomarkdown/markdown v0.0.0-20231222211730-1d6d20845b47 h1:k4Tw0nt6lwro3Uin8eqoET7MDA4JnT8YgbCjc/g5E3k=
+github.com/gomarkdown/markdown v0.0.0-20231222211730-1d6d20845b47/go.mod h1:JDGcbDT52eL4fju3sZ4TeHGsQwhG9nbDV21aMyhwPoA=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
@@ -164,6 +160,8 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/lib/pq v0.0.0-20180327071824-d34b9ff171c2 h1:hRGSmZu7j271trc9sneMrpOW7GN5ngLm8YUZIPzf394=
+github.com/lib/pq v0.0.0-20180327071824-d34b9ff171c2/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY=
github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4=
@@ -184,8 +182,6 @@ github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfr
github.com/mholt/archiver/v3 v3.5.1 h1:rDjOBX9JSF5BvoJGvjqK479aL70qh9DIpZCl+k7Clwo=
github.com/mholt/archiver/v3 v3.5.1/go.mod h1:e3dqJ7H78uzsRSEACH1joayhuSyhnonssnDhppzS1L4=
github.com/microcosm-cc/bluemonday v1.0.21/go.mod h1:ytNkv4RrDrLJ2pqlsSI46O6IVXmZOBBD4SaJyDwwTkM=
-github.com/microcosm-cc/bluemonday v1.0.24 h1:NGQoPtwGVcbGkKfvyYk1yRqknzBuoMiUrO6R7uFTPlw=
-github.com/microcosm-cc/bluemonday v1.0.24/go.mod h1:ArQySAMps0790cHSkdPEJ7bGkF2VePWH773hsJNSHf8=
github.com/microcosm-cc/bluemonday v1.0.26 h1:xbqSvqzQMeEHCqMi64VAs4d8uy6Mequs3rQ0k/Khz58=
github.com/microcosm-cc/bluemonday v1.0.26/go.mod h1:JyzOCs9gkyQyjs+6h10UEVSe02CGwkhd72Xdqh78TWs=
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ=
@@ -200,18 +196,12 @@ github.com/moby/sys/mountinfo v0.5.0/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdx
github.com/moby/term v0.0.0-20201216013528-df9cb8a40635 h1:rzf0wL0CHVc8CEsgyygG0Mn9CNCCPZqOPaz8RiiHYQk=
github.com/moby/term v0.0.0-20201216013528-df9cb8a40635/go.mod h1:FBS0z0QWA44HXygs7VXDUOGoN/1TV3RuWkLO04am3wc=
github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
-github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760 h1:OFVkSxR7CRSRSNm5dvpMRZwmSwWa8EMMnHbc84fW5tU=
-github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig=
github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c h1:CI5uGwqBpN8N7BrSKC+nmdfw+9nPQIDyjHHlaIiitZI=
github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c/go.mod h1:gY3wyrhkRySJtmtI/JPt4a2mKv48h/M9pEZIW+SjeC0=
-github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af h1:XFq6OUqsWQam0OrEr05okXsJK/TQur3zoZTHbiZD3Ks=
-github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af/go.mod h1:8ufRkpz/S/9ahkaxzZ5i4WMgO9w4InEhuRoT7vK5Rnw=
github.com/muesli/reflow v0.3.0 h1:IFsN6K9NfGtjeggFP+68I4chLZV2yIKsXJFNZ+eWh6s=
github.com/muesli/reflow v0.3.0/go.mod h1:pbwTDkVPibjO2kyvBQRBxTWEEGDGq0FlB1BIKtnHY/8=
github.com/muesli/termenv v0.13.0 h1:wK20DRpJdDX8b7Ek2QfhvqhRQFZ237RGRO0RQ/Iqdy0=
github.com/muesli/termenv v0.13.0/go.mod h1:sP1+uffeLaEYpyOTb8pLCUctGcGLnoFjSn4YJK5e2bc=
-github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20231022042237-c25dc5193530 h1:YXMxHwHMB9jCBo2Yu5gz3mTB3T1TnZs/HmPLv15LUSA=
-github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20231022042237-c25dc5193530/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
github.com/nwaples/rardecode v1.1.0 h1:vSxaY8vQhOcVr4mm5e8XllHWTiM4JF507A0Katqw7MQ=
github.com/nwaples/rardecode v1.1.0/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0=
github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A=
@@ -273,11 +263,11 @@ github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUz
github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
github.com/rs/zerolog v1.31.0 h1:FcTR3NnLWW+NnTwwhFWiJSZr4ECLpqCm6QsEnyvbV4A=
github.com/rs/zerolog v1.31.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
+github.com/russross/blackfriday v1.6.0 h1:KqfZb0pUVN2lYqZUYRddxF4OR8ZMURnJIG5Y3VRLtww=
+github.com/russross/blackfriday v1.6.0/go.mod h1:ti0ldHuxg49ri4ksnFxlkCfN+hvslNlmVHqNRXXJNAY=
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
-github.com/sashabaranov/go-openai v1.16.0 h1:34W6WV84ey6OpW0p2UewZkdMu82AxGC+BzpU6iiauRw=
-github.com/sashabaranov/go-openai v1.16.0/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
github.com/sashabaranov/go-openai v1.20.4 h1:095xQ/fAtRa0+Rj21sezVJABgKfGPNbyx/sAN/hJUmg=
github.com/sashabaranov/go-openai v1.20.4/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
github.com/schollz/progressbar/v3 v3.13.1 h1:o8rySDYiQ59Mwzy2FELeHY5ZARXZTVJC7iHD6PEFUiE=
@@ -397,6 +387,8 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJ
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E=
+golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -430,8 +422,6 @@ golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE=
-golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y=
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
@@ -492,3 +482,5 @@ gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gotest.tools/v3 v3.0.2/go.mod h1:3SzNCllyD9/Y+b5r9JIKQ474KzkZyqLqEfYqMsX94Bk=
+gotest.tools/v3 v3.3.0 h1:MfDY1b1/0xN1CyMlQDac0ziEy9zJQd9CXBRRDHw2jJo=
+gotest.tools/v3 v3.3.0/go.mod h1:Mcr9QNxkg0uMvy/YElmo4SpXgJKWgQvYrT7Kw5RzJ1A=
From b9c5e14e2c9a9ac9a9f3292db8914dbdbbe8f06e Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Fri, 29 Mar 2024 00:13:38 +0100
Subject: [PATCH 0063/2750] :arrow_up: Update ggerganov/llama.cpp (#1923)
Signed-off-by: GitHub
Co-authored-by: mudler
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index 3258bbe3..4323e4eb 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=a016026a3ac16d8c9b993a3573f19b9556d67de4
+CPPLLAMA_VERSION?=5106ef482c65ac60ac14da9a68c7b37bca4c6993
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
From ab2f403dd0716e1c167389a3e69486891c5444b8 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Fri, 29 Mar 2024 00:13:59 +0100
Subject: [PATCH 0064/2750] :arrow_up: Update ggerganov/whisper.cpp (#1924)
Signed-off-by: GitHub
Co-authored-by: mudler
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index 4323e4eb..dcaa1227 100644
--- a/Makefile
+++ b/Makefile
@@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
# whisper.cpp version
-WHISPER_CPP_VERSION?=2948c740a2bf43190b8e3badb6f1e147f11f96d1
+WHISPER_CPP_VERSION?=fc366b807a17dc05813a6fcc13c8c4dfd442fa6a
# bert.cpp version
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
From 123a5a2e160ad0053f26b8a75d8b3f3bbd0c2c2d Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Fri, 29 Mar 2024 22:29:33 +0100
Subject: [PATCH 0065/2750] feat(swagger): Add swagger API doc (#1926)
* makefile(build): add minimal and api build target
* feat(swagger): Add swagger
---
Makefile | 10 +
core/http/api.go | 17 +
core/http/endpoints/elevenlabs/tts.go | 6 +
core/http/endpoints/localai/tts.go | 5 +
core/http/endpoints/openai/assistant.go | 16 +-
core/http/endpoints/openai/chat.go | 5 +
core/http/endpoints/openai/completion.go | 6 +-
core/http/endpoints/openai/embeddings.go | 6 +-
core/http/endpoints/openai/image.go | 7 +-
core/http/endpoints/openai/transcription.go | 8 +-
core/http/views/partials/navbar.html | 1 +
go.mod | 45 +-
go.sum | 59 ++
main.go | 2 +
swagger/docs.go | 801 ++++++++++++++++++++
swagger/swagger.json | 776 +++++++++++++++++++
swagger/swagger.yaml | 519 +++++++++++++
17 files changed, 2264 insertions(+), 25 deletions(-)
create mode 100644 swagger/docs.go
create mode 100644 swagger/swagger.json
create mode 100644 swagger/swagger.yaml
diff --git a/Makefile b/Makefile
index dcaa1227..440f5158 100644
--- a/Makefile
+++ b/Makefile
@@ -307,6 +307,12 @@ build: prepare backend-assets grpcs ## Build the project
$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
+build-minimal:
+ BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS=backend-assets/grpc/llama-cpp GO_TAGS=none $(MAKE) build
+
+build-api:
+ BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build
+
dist: build
mkdir -p release
cp $(BINARY_NAME) release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH)
@@ -584,3 +590,7 @@ docker-image-intel-xpu:
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg GO_TAGS="none" \
--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
+
+.PHONY: swagger
+swagger:
+ swag init -g core/http/api.go --output swagger
diff --git a/core/http/api.go b/core/http/api.go
index 24216737..ff413b0a 100644
--- a/core/http/api.go
+++ b/core/http/api.go
@@ -7,6 +7,7 @@ import (
"strings"
"github.com/go-skynet/LocalAI/pkg/utils"
+ "github.com/gofiber/swagger" // swagger handler
"github.com/go-skynet/LocalAI/core/http/endpoints/elevenlabs"
"github.com/go-skynet/LocalAI/core/http/endpoints/localai"
@@ -42,6 +43,20 @@ func readAuthHeader(c *fiber.Ctx) string {
return authHeader
}
+// @title LocalAI API
+// @version 2.0.0
+// @description The LocalAI Rest API.
+// @termsOfService
+// @contact.name LocalAI
+// @contact.url https://localai.io
+// @license.name MIT
+// @license.url https://raw.githubusercontent.com/mudler/LocalAI/master/LICENSE
+// @host localhost:8080
+// @BasePath /
+// @securityDefinitions.apikey BearerAuth
+// @in header
+// @name Authorization
+
func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (*fiber.App, error) {
// Return errors as JSON responses
app := fiber.New(fiber.Config{
@@ -170,6 +185,8 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants)
utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles)
+ app.Get("/swagger/*", swagger.HandlerDefault) // default
+
welcomeRoute(
app,
cl,
diff --git a/core/http/endpoints/elevenlabs/tts.go b/core/http/endpoints/elevenlabs/tts.go
index b70c8de4..841f9b5f 100644
--- a/core/http/endpoints/elevenlabs/tts.go
+++ b/core/http/endpoints/elevenlabs/tts.go
@@ -11,6 +11,12 @@ import (
"github.com/rs/zerolog/log"
)
+// TTSEndpoint is the OpenAI Speech API endpoint https://platform.openai.com/docs/api-reference/audio/createSpeech
+// @Summary Generates audio from the input text.
+// @Param voice-id path string true "Account ID"
+// @Param request body schema.TTSRequest true "query params"
+// @Success 200 {string} binary "Response"
+// @Router /v1/text-to-speech/{voice-id} [post]
func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
diff --git a/core/http/endpoints/localai/tts.go b/core/http/endpoints/localai/tts.go
index 508a29ab..7822e024 100644
--- a/core/http/endpoints/localai/tts.go
+++ b/core/http/endpoints/localai/tts.go
@@ -11,6 +11,11 @@ import (
"github.com/rs/zerolog/log"
)
+// TTSEndpoint is the OpenAI Speech API endpoint https://platform.openai.com/docs/api-reference/audio/createSpeech
+// @Summary Generates audio from the input text.
+// @Param request body schema.TTSRequest true "query params"
+// @Success 200 {string} binary "Response"
+// @Router /v1/audio/speech [post]
func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
diff --git a/core/http/endpoints/openai/assistant.go b/core/http/endpoints/openai/assistant.go
index 0e0d8a99..dceb3789 100644
--- a/core/http/endpoints/openai/assistant.go
+++ b/core/http/endpoints/openai/assistant.go
@@ -2,17 +2,18 @@ package openai
import (
"fmt"
- "github.com/go-skynet/LocalAI/core/config"
- model "github.com/go-skynet/LocalAI/pkg/model"
- "github.com/go-skynet/LocalAI/pkg/utils"
- "github.com/gofiber/fiber/v2"
- "github.com/rs/zerolog/log"
"net/http"
"sort"
"strconv"
"strings"
"sync/atomic"
"time"
+
+ "github.com/go-skynet/LocalAI/core/config"
+ model "github.com/go-skynet/LocalAI/pkg/model"
+ "github.com/go-skynet/LocalAI/pkg/utils"
+ "github.com/gofiber/fiber/v2"
+ "github.com/rs/zerolog/log"
)
// ToolType defines a type for tool options
@@ -65,6 +66,11 @@ type AssistantRequest struct {
Metadata map[string]string `json:"metadata,omitempty"`
}
+// CreateAssistantEndpoint is the OpenAI Assistant API endpoint https://platform.openai.com/docs/api-reference/assistants/createAssistant
+// @Summary Create an assistant with a model and instructions.
+// @Param request body AssistantRequest true "query params"
+// @Success 200 {object} Assistant "Response"
+// @Router /v1/assistants [post]
func CreateAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
request := new(AssistantRequest)
diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go
index 383a2b77..c2e22962 100644
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -20,6 +20,11 @@ import (
"github.com/valyala/fasthttp"
)
+// ChatEndpoint is the OpenAI Completion API endpoint https://platform.openai.com/docs/api-reference/chat/create
+// @Summary Generate a chat completions for a given prompt and model.
+// @Param request body schema.OpenAIRequest true "query params"
+// @Success 200 {object} schema.OpenAIResponse "Response"
+// @Router /v1/chat/completions [post]
func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error {
emptyMessage := ""
id := uuid.New().String()
diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go
index 9344f9fe..a67f0993 100644
--- a/core/http/endpoints/openai/completion.go
+++ b/core/http/endpoints/openai/completion.go
@@ -20,7 +20,11 @@ import (
"github.com/valyala/fasthttp"
)
-// https://platform.openai.com/docs/api-reference/completions
+// CompletionEndpoint is the OpenAI Completion API endpoint https://platform.openai.com/docs/api-reference/completions
+// @Summary Generate completions for a given prompt and model.
+// @Param request body schema.OpenAIRequest true "query params"
+// @Success 200 {object} schema.OpenAIResponse "Response"
+// @Router /v1/completions [post]
func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
id := uuid.New().String()
created := int(time.Now().Unix())
diff --git a/core/http/endpoints/openai/embeddings.go b/core/http/endpoints/openai/embeddings.go
index 774b0a5e..eca34f79 100644
--- a/core/http/endpoints/openai/embeddings.go
+++ b/core/http/endpoints/openai/embeddings.go
@@ -16,7 +16,11 @@ import (
"github.com/rs/zerolog/log"
)
-// https://platform.openai.com/docs/api-reference/embeddings
+// EmbeddingsEndpoint is the OpenAI Embeddings API endpoint https://platform.openai.com/docs/api-reference/embeddings
+// @Summary Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms.
+// @Param request body schema.OpenAIRequest true "query params"
+// @Success 200 {object} schema.OpenAIResponse "Response"
+// @Router /v1/embeddings [post]
func EmbeddingsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
model, input, err := readRequest(c, ml, appConfig, true)
diff --git a/core/http/endpoints/openai/image.go b/core/http/endpoints/openai/image.go
index d59b1051..9e806b3e 100644
--- a/core/http/endpoints/openai/image.go
+++ b/core/http/endpoints/openai/image.go
@@ -44,7 +44,7 @@ func downloadFile(url string) (string, error) {
return out.Name(), err
}
-// https://platform.openai.com/docs/api-reference/images/create
+//
/*
*
@@ -59,6 +59,11 @@ func downloadFile(url string) (string, error) {
*
*/
+// ImageEndpoint is the OpenAI Image generation API endpoint https://platform.openai.com/docs/api-reference/images/create
+// @Summary Creates an image given a prompt.
+// @Param request body schema.OpenAIRequest true "query params"
+// @Success 200 {object} schema.OpenAIResponse "Response"
+// @Router /v1/images/generations [post]
func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
m, input, err := readRequest(c, ml, appConfig, false)
diff --git a/core/http/endpoints/openai/transcription.go b/core/http/endpoints/openai/transcription.go
index 403f8b02..c7dd39e7 100644
--- a/core/http/endpoints/openai/transcription.go
+++ b/core/http/endpoints/openai/transcription.go
@@ -16,7 +16,13 @@ import (
"github.com/rs/zerolog/log"
)
-// https://platform.openai.com/docs/api-reference/audio/create
+// TranscriptEndpoint is the OpenAI Whisper API endpoint https://platform.openai.com/docs/api-reference/audio/create
+// @Summary Transcribes audio into the input language.
+// @accept multipart/form-data
+// @Param model formData string true "model"
+// @Param file formData file true "file"
+// @Success 200 {object} map[string]string "Response"
+// @Router /v1/audio/transcriptions [post]
func TranscriptEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
m, input, err := readRequest(c, ml, appConfig, false)
diff --git a/core/http/views/partials/navbar.html b/core/http/views/partials/navbar.html
index 2717f974..c3d3223f 100644
--- a/core/http/views/partials/navbar.html
+++ b/core/http/views/partials/navbar.html
@@ -9,6 +9,7 @@
diff --git a/go.mod b/go.mod
index 0ac0d8e5..4dd207c7 100644
--- a/go.mod
+++ b/go.mod
@@ -12,10 +12,10 @@ require (
github.com/go-audio/wav v1.1.0
github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1
github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428
- github.com/gofiber/fiber/v2 v2.50.0
+ github.com/gofiber/fiber/v2 v2.52.0
github.com/gofiber/template/html/v2 v2.1.1
github.com/gomarkdown/markdown v0.0.0-20231222211730-1d6d20845b47
- github.com/google/uuid v1.3.1
+ github.com/google/uuid v1.5.0
github.com/hashicorp/go-multierror v1.1.1
github.com/hpcloud/tail v1.0.0
github.com/imdario/mergo v0.3.16
@@ -32,10 +32,10 @@ require (
github.com/rs/zerolog v1.31.0
github.com/sashabaranov/go-openai v1.20.4
github.com/schollz/progressbar/v3 v3.13.1
- github.com/stretchr/testify v1.8.4
+ github.com/stretchr/testify v1.9.0
github.com/tmc/langchaingo v0.0.0-20231019140956-c636b3da7701
- github.com/urfave/cli/v2 v2.25.7
- github.com/valyala/fasthttp v1.50.0
+ github.com/urfave/cli/v2 v2.27.1
+ github.com/valyala/fasthttp v1.51.0
go.opentelemetry.io/otel v1.19.0
go.opentelemetry.io/otel/exporters/prometheus v0.42.0
go.opentelemetry.io/otel/metric v1.19.0
@@ -59,10 +59,13 @@ require (
require (
github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78 // indirect
+ github.com/KyleBanks/depth v1.2.1 // indirect
github.com/Masterminds/goutils v1.1.1 // indirect
github.com/Masterminds/semver/v3 v3.2.0 // indirect
github.com/Microsoft/go-winio v0.6.0 // indirect
github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 // indirect
+ github.com/PuerkitoBio/purell v1.2.1 // indirect
+ github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
github.com/alecthomas/chroma v0.10.0 // indirect
github.com/aymanbagabas/go-osc52 v1.0.3 // indirect
github.com/aymerick/douceur v0.2.0 // indirect
@@ -78,6 +81,11 @@ require (
github.com/docker/go-units v0.4.0 // indirect
github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
+ github.com/go-openapi/jsonpointer v0.21.0 // indirect
+ github.com/go-openapi/jsonreference v0.21.0 // indirect
+ github.com/go-openapi/spec v0.21.0 // indirect
+ github.com/go-openapi/swag v0.23.0 // indirect
+ github.com/gofiber/swagger v1.0.0 // indirect
github.com/gofiber/template v1.8.3 // indirect
github.com/gofiber/utils v1.1.0 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
@@ -86,8 +94,10 @@ require (
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect
github.com/gorilla/css v1.0.0 // indirect
github.com/huandu/xstrings v1.3.3 // indirect
+ github.com/josharian/intern v1.0.0 // indirect
github.com/klauspost/pgzip v1.2.5 // indirect
github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
+ github.com/mailru/easyjson v0.7.7 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
github.com/microcosm-cc/bluemonday v1.0.26 // indirect
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
@@ -113,6 +123,8 @@ require (
github.com/shopspring/decimal v1.2.0 // indirect
github.com/sirupsen/logrus v1.8.1 // indirect
github.com/spf13/cast v1.3.1 // indirect
+ github.com/swaggo/files/v2 v2.0.0 // indirect
+ github.com/swaggo/swag v1.16.3 // indirect
github.com/ulikunitz/xz v0.5.9 // indirect
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f // indirect
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect
@@ -122,17 +134,18 @@ require (
github.com/yuin/goldmark-emoji v1.0.1 // indirect
go.opentelemetry.io/otel/sdk v1.19.0 // indirect
go.opentelemetry.io/otel/trace v1.19.0 // indirect
- golang.org/x/crypto v0.14.0 // indirect
- golang.org/x/mod v0.12.0 // indirect
- golang.org/x/term v0.13.0 // indirect
+ golang.org/x/crypto v0.21.0 // indirect
+ golang.org/x/mod v0.16.0 // indirect
+ golang.org/x/term v0.18.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d // indirect
gopkg.in/fsnotify.v1 v1.4.7 // indirect
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect
+ sigs.k8s.io/yaml v1.4.0 // indirect
)
require (
github.com/andybalholm/brotli v1.0.5 // indirect
- github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
+ github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect
github.com/go-audio/audio v1.0.0 // indirect
github.com/go-audio/riff v1.0.0 // indirect
github.com/go-logr/logr v1.2.4 // indirect
@@ -140,18 +153,18 @@ require (
github.com/google/go-cmp v0.6.0 // indirect
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect
github.com/hashicorp/errwrap v1.0.0 // indirect
- github.com/klauspost/compress v1.16.7 // indirect
+ github.com/klauspost/compress v1.17.0 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
- github.com/mattn/go-isatty v0.0.19 // indirect
+ github.com/mattn/go-isatty v0.0.20 // indirect
github.com/mattn/go-runewidth v0.0.15 // indirect
github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760
github.com/rivo/uniseg v0.2.0 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/valyala/bytebufferpool v1.0.0 // indirect
github.com/valyala/tcplisten v1.0.0 // indirect
- github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
- golang.org/x/net v0.17.0 // indirect
- golang.org/x/sys v0.17.0 // indirect
- golang.org/x/text v0.13.0 // indirect
- golang.org/x/tools v0.12.0 // indirect
+ github.com/xrash/smetrics v0.0.0-20240312152122-5f08fbb34913 // indirect
+ golang.org/x/net v0.22.0 // indirect
+ golang.org/x/sys v0.18.0 // indirect
+ golang.org/x/text v0.14.0 // indirect
+ golang.org/x/tools v0.19.0 // indirect
)
diff --git a/go.sum b/go.sum
index bec43989..f81f10c8 100644
--- a/go.sum
+++ b/go.sum
@@ -1,6 +1,8 @@
github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78 h1:w+iIsaOQNcT7OZ575w+acHgRric5iCyQh+xv+KJ4HB8=
github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX56iTiv29bbRTIsUNlaFfuhWRQBWjQdVyAevI8=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
+github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
+github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=
github.com/Masterminds/semver/v3 v3.2.0 h1:3MEsd0SM6jqZojhjLWWeBY+Kcjy9i6MQAeY7YgDP83g=
@@ -11,6 +13,10 @@ github.com/Microsoft/go-winio v0.6.0 h1:slsWYD/zyx7lCXoZVlvQrj0hPTM1HI4+v1sIda2y
github.com/Microsoft/go-winio v0.6.0/go.mod h1:cTAf44im0RAYeL23bpB+fzCyDH2MJiz2BO69KH/soAE=
github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 h1:TngWCqHvy9oXAN6lEVMRuU21PR1EtLVZJmdB18Gu3Rw=
github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5/go.mod h1:lmUJ/7eu/Q8D7ML55dXQrVaamCz2vxCfdQBasLZfHKk=
+github.com/PuerkitoBio/purell v1.2.1 h1:QsZ4TjvwiMpat6gBCBxEQI0rcS9ehtkKtSpiUnd9N28=
+github.com/PuerkitoBio/purell v1.2.1/go.mod h1:ZwHcC/82TOaovDi//J/804umJFFmbOHPngi8iYYv/Eo=
+github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M=
+github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
github.com/alecthomas/chroma v0.10.0 h1:7XDcGkCQopCNKjZHfYrNLraA+M7e0fMiJ/Mfikbfjek=
github.com/alecthomas/chroma v0.10.0/go.mod h1:jtJATyUxlIORhUOFNA9NZDWGAQ8wpxQQqNSB4rjA/1s=
github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y=
@@ -41,6 +47,8 @@ github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSV
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w=
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
+github.com/cpuguy83/go-md2man/v2 v2.0.4 h1:wfIWP927BUkWJb2NmU/kNDYIBTh/ziUX91+lVfRxZq4=
+github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.11 h1:07n33Z8lZxZ2qwegKbObQohDhXDQxiMMz1NOUGYlesw=
github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4=
@@ -79,6 +87,14 @@ github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
+github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ=
+github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY=
+github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ=
+github.com/go-openapi/jsonreference v0.21.0/go.mod h1:LmZmgsrTkVg9LG4EaHeY8cBDslNPMo06cago5JNLkm4=
+github.com/go-openapi/spec v0.21.0 h1:LTVzPc3p/RzRnkQqLRndbAzjY0d0BCL72A6j3CdL9ZY=
+github.com/go-openapi/spec v0.21.0/go.mod h1:78u6VdPw81XU44qEWGhtr982gJ5BWg2c0I5XwVMotYk=
+github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE=
+github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ=
github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428 h1:WYjkXL0Nw7dN2uDBMVCWQ8xLavrIhjF/DLczuh5L9TY=
github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428/go.mod h1:iub0ugfTnflE3rcIuqV2pQSo15nEw3GLW/utm5gyERo=
github.com/go-sql-driver/mysql v1.7.1 h1:lUIinVbN1DY0xBg0eMOzmmtGoHwWBbvnWubQUrtU8EI=
@@ -90,6 +106,10 @@ github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5x
github.com/godbus/dbus/v5 v5.0.6/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/gofiber/fiber/v2 v2.50.0 h1:ia0JaB+uw3GpNSCR5nvC5dsaxXjRU5OEu36aytx+zGw=
github.com/gofiber/fiber/v2 v2.50.0/go.mod h1:21eytvay9Is7S6z+OgPi7c7n4++tnClWmhpimVHMimw=
+github.com/gofiber/fiber/v2 v2.52.0 h1:S+qXi7y+/Pgvqq4DrSmREGiFwtB7Bu6+QFLuIHYw/UE=
+github.com/gofiber/fiber/v2 v2.52.0/go.mod h1:KEOE+cXMhXG0zHc9d8+E38hoX+ZN7bhOtgeF2oT6jrQ=
+github.com/gofiber/swagger v1.0.0 h1:BzUzDS9ZT6fDUa692kxmfOjc1DZiloLiPK/W5z1H1tc=
+github.com/gofiber/swagger v1.0.0/go.mod h1:QrYNF1Yrc7ggGK6ATsJ6yfH/8Zi5bu9lA7wB8TmCecg=
github.com/gofiber/template v1.8.3 h1:hzHdvMwMo/T2kouz2pPCA0zGiLCeMnoGsQZBTSYgZxc=
github.com/gofiber/template v1.8.3/go.mod h1:bs/2n0pSNPOkRa5VJ8zTIvedcI/lEYxzV3+YPXdBvq8=
github.com/gofiber/template/html/v2 v2.1.1 h1:QEy3O3EBkvwDthy5bXVGUseOyO6ldJoiDxlF4+MJiV8=
@@ -129,6 +149,8 @@ github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3
github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4=
github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/google/uuid v1.5.0 h1:1p67kYwdtXjb0gL0BPiP1Av9wiZPo5A8z2cWkTZ+eyU=
+github.com/google/uuid v1.5.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/gorilla/css v1.0.0 h1:BQqNyPTi50JCFMTw/b67hByjMVXZRwGha6wxVGkeihY=
github.com/gorilla/css v1.0.0/go.mod h1:Dn721qIggHpt4+EFCcTLTU/vk5ySda2ReITrtgBl60c=
github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA=
@@ -143,6 +165,8 @@ github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:
github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA=
github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4=
github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY=
+github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
+github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
@@ -150,6 +174,8 @@ github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0
github.com/klauspost/compress v1.11.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I=
github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
+github.com/klauspost/compress v1.17.0 h1:Rnbp4K9EjcDuVuHtd0dgA4qNuv9yKDYKK1ulpJwgrqM=
+github.com/klauspost/compress v1.17.0/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE=
github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
@@ -166,12 +192,16 @@ github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69
github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4=
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I=
+github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
+github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
+github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
+github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
github.com/mattn/go-runewidth v0.0.12/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk=
github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
@@ -300,6 +330,12 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
+github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
+github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/swaggo/files/v2 v2.0.0 h1:hmAt8Dkynw7Ssz46F6pn8ok6YmGZqHSVLZ+HQM7i0kw=
+github.com/swaggo/files/v2 v2.0.0/go.mod h1:24kk2Y9NYEJ5lHuCra6iVwkMjIekMCaFq/0JQj66kyM=
+github.com/swaggo/swag v1.16.3 h1:PnCYjPCah8FK4I26l2F/KQ4yz3sILcVUN3cTlBFA9Pg=
+github.com/swaggo/swag v1.16.3/go.mod h1:DImHIuOFXKpMFAQjcC7FG4m3Dg4+QuUgUzJmKjI/gRk=
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
github.com/tklauser/go-sysconf v0.3.11/go.mod h1:GqXfhXY3kiPa0nAXPDIQIWzJbMCB7AmcWpGR8lSZfqI=
github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFAEVmqU=
@@ -315,10 +351,14 @@ github.com/ulikunitz/xz v0.5.9/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oW
github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
github.com/urfave/cli/v2 v2.25.7 h1:VAzn5oq403l5pHjc4OhD54+XGO9cdKVL/7lDjF+iKUs=
github.com/urfave/cli/v2 v2.25.7/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ=
+github.com/urfave/cli/v2 v2.27.1 h1:8xSQ6szndafKVRmfyeUMxkNUJQMjL1F2zmsZ+qHpfho=
+github.com/urfave/cli/v2 v2.27.1/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ=
github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
github.com/valyala/fasthttp v1.50.0 h1:H7fweIlBm0rXLs2q0XbalvJ6r0CUPFWK3/bB4N13e9M=
github.com/valyala/fasthttp v1.50.0/go.mod h1:k2zXd82h/7UZc3VOdJ2WaUqt1uZ/XpXAfE9i+HBC3lA=
+github.com/valyala/fasthttp v1.51.0 h1:8b30A5JlZ6C7AS81RsWjYMQmrZG6feChmgAolCl1SqA=
+github.com/valyala/fasthttp v1.51.0/go.mod h1:oI2XroL+lI7vdXyYoQk03bXBThfFl2cVdIA3Xl7cH8g=
github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8=
github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc=
github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE=
@@ -333,6 +373,8 @@ github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofm
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos=
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU=
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8=
+github.com/xrash/smetrics v0.0.0-20240312152122-5f08fbb34913 h1:+qGGcbkzsfDQNPPe9UDgpxAWQrhbbBXOYJFQDq/dtJw=
+github.com/xrash/smetrics v0.0.0-20240312152122-5f08fbb34913/go.mod h1:4aEEwZQutDLsQv2Deui4iYQ6DWTxR14g6m8Wv88+Xqk=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
@@ -361,11 +403,15 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y
golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4=
golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc=
golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4=
+golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA=
+golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc=
golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
+golang.org/x/mod v0.16.0 h1:QX4fJ0Rr5cPQCF7O9lh9Se4pmwfwskqZfq5moyldzic=
+golang.org/x/mod v0.16.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
@@ -381,6 +427,8 @@ golang.org/x/net v0.0.0-20221002022538-bcab6841153b/go.mod h1:YDH+HFinaLZZlnHAfS
golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY=
golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
+golang.org/x/net v0.22.0 h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc=
+golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -389,6 +437,7 @@ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJ
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E=
golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
+golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ=
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -424,12 +473,16 @@ golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y=
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4=
+golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc=
golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U=
golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek=
golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U=
+golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8=
+golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
@@ -437,6 +490,8 @@ golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
+golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
+golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190624222133-a101b041ded4/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
@@ -446,6 +501,8 @@ golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4f
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.12.0 h1:YW6HUoUmYBpwSgyaGaZq1fHjrBjX1rlpZ54T6mu2kss=
golang.org/x/tools v0.12.0/go.mod h1:Sc0INKfu04TlqNoRA1hgpFZbhYXHPr4V5DzpSBTPqQM=
+golang.org/x/tools v0.19.0 h1:tfGCXNR1OsFG+sVdLAitlpjAvD/I6dHDKnYrpEZUHkw=
+golang.org/x/tools v0.19.0/go.mod h1:qoJWxmGSIBmAeriMx19ogtrEPrGtDbPK634QFIcLAhc=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
@@ -484,3 +541,5 @@ gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gotest.tools/v3 v3.0.2/go.mod h1:3SzNCllyD9/Y+b5r9JIKQ474KzkZyqLqEfYqMsX94Bk=
gotest.tools/v3 v3.3.0 h1:MfDY1b1/0xN1CyMlQDac0ziEy9zJQd9CXBRRDHw2jJo=
gotest.tools/v3 v3.3.0/go.mod h1:Mcr9QNxkg0uMvy/YElmo4SpXgJKWgQvYrT7Kw5RzJ1A=
+sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
+sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY=
diff --git a/main.go b/main.go
index f000aa71..0d8befcb 100644
--- a/main.go
+++ b/main.go
@@ -25,6 +25,8 @@ import (
"github.com/rs/zerolog/log"
progressbar "github.com/schollz/progressbar/v3"
"github.com/urfave/cli/v2"
+
+ _ "github.com/go-skynet/LocalAI/swagger"
)
const (
diff --git a/swagger/docs.go b/swagger/docs.go
new file mode 100644
index 00000000..a922fa2e
--- /dev/null
+++ b/swagger/docs.go
@@ -0,0 +1,801 @@
+// Code generated by swaggo/swag. DO NOT EDIT.
+
+package swagger
+
+import "github.com/swaggo/swag"
+
+const docTemplate = `{
+ "schemes": {{ marshal .Schemes }},
+ "swagger": "2.0",
+ "info": {
+ "description": "{{escape .Description}}",
+ "title": "{{.Title}}",
+ "contact": {
+ "name": "OpenAI Support",
+ "url": "https://help.openai.com/"
+ },
+ "license": {
+ "name": "MIT",
+ "url": "https://github.com/openai/openai-openapi/blob/master/LICENSE"
+ },
+ "version": "{{.Version}}"
+ },
+ "host": "{{.Host}}",
+ "basePath": "{{.BasePath}}",
+ "paths": {
+ "/v1/assistants": {
+ "post": {
+ "summary": "Create an assistant with a model and instructions.",
+ "parameters": [
+ {
+ "description": "query params",
+ "name": "request",
+ "in": "body",
+ "required": true,
+ "schema": {
+ "$ref": "#/definitions/openai.AssistantRequest"
+ }
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Response",
+ "schema": {
+ "$ref": "#/definitions/openai.Assistant"
+ }
+ }
+ }
+ }
+ },
+ "/v1/audio/speech": {
+ "post": {
+ "summary": "Generates audio from the input text.",
+ "parameters": [
+ {
+ "description": "query params",
+ "name": "request",
+ "in": "body",
+ "required": true,
+ "schema": {
+ "$ref": "#/definitions/schema.TTSRequest"
+ }
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Response",
+ "schema": {
+ "type": "string"
+ }
+ }
+ }
+ }
+ },
+ "/v1/audio/transcriptions": {
+ "post": {
+ "consumes": [
+ "multipart/form-data"
+ ],
+ "summary": "Transcribes audio into the input language.",
+ "parameters": [
+ {
+ "type": "string",
+ "description": "model",
+ "name": "model",
+ "in": "formData",
+ "required": true
+ },
+ {
+ "type": "file",
+ "description": "file",
+ "name": "file",
+ "in": "formData",
+ "required": true
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Response",
+ "schema": {
+ "type": "object",
+ "additionalProperties": {
+ "type": "string"
+ }
+ }
+ }
+ }
+ }
+ },
+ "/v1/chat/completions": {
+ "post": {
+ "summary": "Generate a chat completions for a given prompt and model.",
+ "parameters": [
+ {
+ "description": "query params",
+ "name": "request",
+ "in": "body",
+ "required": true,
+ "schema": {
+ "$ref": "#/definitions/schema.OpenAIRequest"
+ }
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Response",
+ "schema": {
+ "$ref": "#/definitions/schema.OpenAIResponse"
+ }
+ }
+ }
+ }
+ },
+ "/v1/completions": {
+ "post": {
+ "summary": "Generate completions for a given prompt and model.",
+ "parameters": [
+ {
+ "description": "query params",
+ "name": "request",
+ "in": "body",
+ "required": true,
+ "schema": {
+ "$ref": "#/definitions/schema.OpenAIRequest"
+ }
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Response",
+ "schema": {
+ "$ref": "#/definitions/schema.OpenAIResponse"
+ }
+ }
+ }
+ }
+ },
+ "/v1/embeddings": {
+ "post": {
+ "summary": "Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms.",
+ "parameters": [
+ {
+ "description": "query params",
+ "name": "request",
+ "in": "body",
+ "required": true,
+ "schema": {
+ "$ref": "#/definitions/schema.OpenAIRequest"
+ }
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Response",
+ "schema": {
+ "$ref": "#/definitions/schema.OpenAIResponse"
+ }
+ }
+ }
+ }
+ },
+ "/v1/images/generations": {
+ "post": {
+ "summary": "Creates an image given a prompt.",
+ "parameters": [
+ {
+ "description": "query params",
+ "name": "request",
+ "in": "body",
+ "required": true,
+ "schema": {
+ "$ref": "#/definitions/schema.OpenAIRequest"
+ }
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Response",
+ "schema": {
+ "$ref": "#/definitions/schema.OpenAIResponse"
+ }
+ }
+ }
+ }
+ },
+ "/v1/text-to-speech/{voice-id}": {
+ "post": {
+ "summary": "Generates audio from the input text.",
+ "parameters": [
+ {
+ "type": "string",
+ "description": "Account ID",
+ "name": "voice-id",
+ "in": "path",
+ "required": true
+ },
+ {
+ "description": "query params",
+ "name": "request",
+ "in": "body",
+ "required": true,
+ "schema": {
+ "$ref": "#/definitions/schema.TTSRequest"
+ }
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Response",
+ "schema": {
+ "type": "string"
+ }
+ }
+ }
+ }
+ }
+ },
+ "definitions": {
+ "grammar.Argument": {
+ "type": "object",
+ "properties": {
+ "properties": {
+ "type": "object",
+ "additionalProperties": true
+ },
+ "type": {
+ "type": "string"
+ }
+ }
+ },
+ "grammar.Function": {
+ "type": "object",
+ "properties": {
+ "description": {
+ "type": "string"
+ },
+ "name": {
+ "type": "string"
+ },
+ "parameters": {
+ "type": "object",
+ "additionalProperties": true
+ }
+ }
+ },
+ "grammar.FunctionName": {
+ "type": "object",
+ "properties": {
+ "const": {
+ "type": "string"
+ }
+ }
+ },
+ "grammar.Item": {
+ "type": "object",
+ "properties": {
+ "properties": {
+ "$ref": "#/definitions/grammar.Properties"
+ },
+ "type": {
+ "type": "string"
+ }
+ }
+ },
+ "grammar.JSONFunctionStructure": {
+ "type": "object",
+ "properties": {
+ "$defs": {
+ "type": "object",
+ "additionalProperties": true
+ },
+ "anyOf": {
+ "type": "array",
+ "items": {
+ "$ref": "#/definitions/grammar.Item"
+ }
+ },
+ "oneOf": {
+ "type": "array",
+ "items": {
+ "$ref": "#/definitions/grammar.Item"
+ }
+ }
+ }
+ },
+ "grammar.Properties": {
+ "type": "object",
+ "properties": {
+ "arguments": {
+ "$ref": "#/definitions/grammar.Argument"
+ },
+ "function": {
+ "$ref": "#/definitions/grammar.FunctionName"
+ }
+ }
+ },
+ "grammar.Tool": {
+ "type": "object",
+ "properties": {
+ "function": {
+ "$ref": "#/definitions/grammar.Function"
+ },
+ "type": {
+ "type": "string"
+ }
+ }
+ },
+ "openai.Assistant": {
+ "type": "object",
+ "properties": {
+ "created": {
+ "description": "The time at which the assistant was created.",
+ "type": "integer"
+ },
+ "description": {
+ "description": "The description of the assistant.",
+ "type": "string"
+ },
+ "file_ids": {
+ "description": "A list of file IDs attached to this assistant.",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "id": {
+ "description": "The unique identifier of the assistant.",
+ "type": "string"
+ },
+ "instructions": {
+ "description": "The system instructions that the assistant uses.",
+ "type": "string"
+ },
+ "metadata": {
+ "description": "Set of key-value pairs attached to the assistant.",
+ "type": "object",
+ "additionalProperties": {
+ "type": "string"
+ }
+ },
+ "model": {
+ "description": "The model ID used by the assistant.",
+ "type": "string"
+ },
+ "name": {
+ "description": "The name of the assistant.",
+ "type": "string"
+ },
+ "object": {
+ "description": "Object type, which is \"assistant\".",
+ "type": "string"
+ },
+ "tools": {
+ "description": "A list of tools enabled on the assistant.",
+ "type": "array",
+ "items": {
+ "$ref": "#/definitions/openai.Tool"
+ }
+ }
+ }
+ },
+ "openai.AssistantRequest": {
+ "type": "object",
+ "properties": {
+ "description": {
+ "type": "string"
+ },
+ "file_ids": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "instructions": {
+ "type": "string"
+ },
+ "metadata": {
+ "type": "object",
+ "additionalProperties": {
+ "type": "string"
+ }
+ },
+ "model": {
+ "type": "string"
+ },
+ "name": {
+ "type": "string"
+ },
+ "tools": {
+ "type": "array",
+ "items": {
+ "$ref": "#/definitions/openai.Tool"
+ }
+ }
+ }
+ },
+ "openai.Tool": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "$ref": "#/definitions/openai.ToolType"
+ }
+ }
+ },
+ "openai.ToolType": {
+ "type": "string",
+ "enum": [
+ "code_interpreter",
+ "retrieval",
+ "function"
+ ],
+ "x-enum-varnames": [
+ "CodeInterpreter",
+ "Retrieval",
+ "Function"
+ ]
+ },
+ "schema.ChatCompletionResponseFormat": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string"
+ }
+ }
+ },
+ "schema.Choice": {
+ "type": "object",
+ "properties": {
+ "delta": {
+ "$ref": "#/definitions/schema.Message"
+ },
+ "finish_reason": {
+ "type": "string"
+ },
+ "index": {
+ "type": "integer"
+ },
+ "message": {
+ "$ref": "#/definitions/schema.Message"
+ },
+ "text": {
+ "type": "string"
+ }
+ }
+ },
+ "schema.FunctionCall": {
+ "type": "object",
+ "properties": {
+ "arguments": {
+ "type": "string"
+ },
+ "name": {
+ "type": "string"
+ }
+ }
+ },
+ "schema.Item": {
+ "type": "object",
+ "properties": {
+ "b64_json": {
+ "type": "string"
+ },
+ "embedding": {
+ "type": "array",
+ "items": {
+ "type": "number"
+ }
+ },
+ "index": {
+ "type": "integer"
+ },
+ "object": {
+ "type": "string"
+ },
+ "url": {
+ "description": "Images",
+ "type": "string"
+ }
+ }
+ },
+ "schema.Message": {
+ "type": "object",
+ "properties": {
+ "content": {
+ "description": "The message content"
+ },
+ "function_call": {
+ "description": "A result of a function call"
+ },
+ "name": {
+ "description": "The message name (used for tools calls)",
+ "type": "string"
+ },
+ "role": {
+ "description": "The message role",
+ "type": "string"
+ },
+ "string_content": {
+ "type": "string"
+ },
+ "string_images": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "tool_calls": {
+ "type": "array",
+ "items": {
+ "$ref": "#/definitions/schema.ToolCall"
+ }
+ }
+ }
+ },
+ "schema.OpenAIRequest": {
+ "type": "object",
+ "required": [
+ "file"
+ ],
+ "properties": {
+ "backend": {
+ "type": "string"
+ },
+ "batch": {
+ "description": "Custom parameters - not present in the OpenAI API",
+ "type": "integer"
+ },
+ "clip_skip": {
+ "description": "Diffusers",
+ "type": "integer"
+ },
+ "echo": {
+ "type": "boolean"
+ },
+ "file": {
+ "description": "whisper",
+ "type": "string"
+ },
+ "frequency_penalty": {
+ "type": "number"
+ },
+ "function_call": {
+ "description": "might be a string or an object"
+ },
+ "functions": {
+ "description": "A list of available functions to call",
+ "type": "array",
+ "items": {
+ "$ref": "#/definitions/grammar.Function"
+ }
+ },
+ "grammar": {
+ "description": "A grammar to constrain the LLM output",
+ "type": "string"
+ },
+ "grammar_json_functions": {
+ "$ref": "#/definitions/grammar.JSONFunctionStructure"
+ },
+ "ignore_eos": {
+ "type": "boolean"
+ },
+ "input": {},
+ "instruction": {
+ "description": "Edit endpoint",
+ "type": "string"
+ },
+ "language": {
+ "description": "Also part of the OpenAI official spec",
+ "type": "string"
+ },
+ "max_tokens": {
+ "type": "integer"
+ },
+ "messages": {
+ "description": "Messages is read only by chat/completion API calls",
+ "type": "array",
+ "items": {
+ "$ref": "#/definitions/schema.Message"
+ }
+ },
+ "mode": {
+ "description": "Image (not supported by OpenAI)",
+ "type": "integer"
+ },
+ "model": {
+ "description": "Also part of the OpenAI official spec",
+ "type": "string"
+ },
+ "model_base_name": {
+ "description": "AutoGPTQ",
+ "type": "string"
+ },
+ "n": {
+ "description": "Also part of the OpenAI official spec. use it for returning multiple results",
+ "type": "integer"
+ },
+ "n_keep": {
+ "type": "integer"
+ },
+ "negative_prompt": {
+ "type": "string"
+ },
+ "negative_prompt_scale": {
+ "type": "number"
+ },
+ "presence_penalty": {
+ "type": "number"
+ },
+ "prompt": {
+ "description": "Prompt is read only by completion/image API calls"
+ },
+ "repeat_penalty": {
+ "type": "number"
+ },
+ "response_format": {
+ "description": "whisper/image",
+ "allOf": [
+ {
+ "$ref": "#/definitions/schema.ChatCompletionResponseFormat"
+ }
+ ]
+ },
+ "rope_freq_base": {
+ "type": "number"
+ },
+ "rope_freq_scale": {
+ "type": "number"
+ },
+ "seed": {
+ "type": "integer"
+ },
+ "size": {
+ "description": "image",
+ "type": "string"
+ },
+ "step": {
+ "type": "integer"
+ },
+ "stop": {},
+ "stream": {
+ "type": "boolean"
+ },
+ "temperature": {
+ "type": "number"
+ },
+ "tfz": {
+ "type": "number"
+ },
+ "tokenizer": {
+ "description": "RWKV (?)",
+ "type": "string"
+ },
+ "tool_choice": {},
+ "tools": {
+ "type": "array",
+ "items": {
+ "$ref": "#/definitions/grammar.Tool"
+ }
+ },
+ "top_k": {
+ "type": "integer"
+ },
+ "top_p": {
+ "description": "Common options between all the API calls, part of the OpenAI spec",
+ "type": "number"
+ },
+ "typical_p": {
+ "type": "number"
+ },
+ "use_fast_tokenizer": {
+ "description": "AutoGPTQ",
+ "type": "boolean"
+ }
+ }
+ },
+ "schema.OpenAIResponse": {
+ "type": "object",
+ "properties": {
+ "choices": {
+ "type": "array",
+ "items": {
+ "$ref": "#/definitions/schema.Choice"
+ }
+ },
+ "created": {
+ "type": "integer"
+ },
+ "data": {
+ "type": "array",
+ "items": {
+ "$ref": "#/definitions/schema.Item"
+ }
+ },
+ "id": {
+ "type": "string"
+ },
+ "model": {
+ "type": "string"
+ },
+ "object": {
+ "type": "string"
+ },
+ "usage": {
+ "$ref": "#/definitions/schema.OpenAIUsage"
+ }
+ }
+ },
+ "schema.OpenAIUsage": {
+ "type": "object",
+ "properties": {
+ "completion_tokens": {
+ "type": "integer"
+ },
+ "prompt_tokens": {
+ "type": "integer"
+ },
+ "total_tokens": {
+ "type": "integer"
+ }
+ }
+ },
+ "schema.TTSRequest": {
+ "type": "object",
+ "properties": {
+ "backend": {
+ "type": "string"
+ },
+ "input": {
+ "type": "string"
+ },
+ "model": {
+ "type": "string"
+ },
+ "voice": {
+ "type": "string"
+ }
+ }
+ },
+ "schema.ToolCall": {
+ "type": "object",
+ "properties": {
+ "function": {
+ "$ref": "#/definitions/schema.FunctionCall"
+ },
+ "id": {
+ "type": "string"
+ },
+ "index": {
+ "type": "integer"
+ },
+ "type": {
+ "type": "string"
+ }
+ }
+ }
+ },
+ "securityDefinitions": {
+ "BearerAuth": {
+ "type": "apiKey",
+ "name": "Authorization",
+ "in": "header"
+ }
+ }
+}`
+
+// SwaggerInfo holds exported Swagger Info so clients can modify it
+var SwaggerInfo = &swag.Spec{
+ Version: "2.0.0",
+ Host: "localhost:8080",
+ BasePath: "/",
+ Schemes: []string{},
+ Title: "LocalAI API",
+ Description: "The OpenAI REST API.",
+ InfoInstanceName: "swagger",
+ SwaggerTemplate: docTemplate,
+ LeftDelim: "{{",
+ RightDelim: "}}",
+}
+
+func init() {
+ swag.Register(SwaggerInfo.InstanceName(), SwaggerInfo)
+}
diff --git a/swagger/swagger.json b/swagger/swagger.json
new file mode 100644
index 00000000..c9088d93
--- /dev/null
+++ b/swagger/swagger.json
@@ -0,0 +1,776 @@
+{
+ "swagger": "2.0",
+ "info": {
+ "description": "The OpenAI REST API.",
+ "title": "LocalAI API",
+ "contact": {
+ "name": "OpenAI Support",
+ "url": "https://help.openai.com/"
+ },
+ "license": {
+ "name": "MIT",
+ "url": "https://github.com/openai/openai-openapi/blob/master/LICENSE"
+ },
+ "version": "2.0.0"
+ },
+ "host": "localhost:8080",
+ "basePath": "/",
+ "paths": {
+ "/v1/assistants": {
+ "post": {
+ "summary": "Create an assistant with a model and instructions.",
+ "parameters": [
+ {
+ "description": "query params",
+ "name": "request",
+ "in": "body",
+ "required": true,
+ "schema": {
+ "$ref": "#/definitions/openai.AssistantRequest"
+ }
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Response",
+ "schema": {
+ "$ref": "#/definitions/openai.Assistant"
+ }
+ }
+ }
+ }
+ },
+ "/v1/audio/speech": {
+ "post": {
+ "summary": "Generates audio from the input text.",
+ "parameters": [
+ {
+ "description": "query params",
+ "name": "request",
+ "in": "body",
+ "required": true,
+ "schema": {
+ "$ref": "#/definitions/schema.TTSRequest"
+ }
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Response",
+ "schema": {
+ "type": "string"
+ }
+ }
+ }
+ }
+ },
+ "/v1/audio/transcriptions": {
+ "post": {
+ "consumes": [
+ "multipart/form-data"
+ ],
+ "summary": "Transcribes audio into the input language.",
+ "parameters": [
+ {
+ "type": "string",
+ "description": "model",
+ "name": "model",
+ "in": "formData",
+ "required": true
+ },
+ {
+ "type": "file",
+ "description": "file",
+ "name": "file",
+ "in": "formData",
+ "required": true
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Response",
+ "schema": {
+ "type": "object",
+ "additionalProperties": {
+ "type": "string"
+ }
+ }
+ }
+ }
+ }
+ },
+ "/v1/chat/completions": {
+ "post": {
+ "summary": "Generate a chat completions for a given prompt and model.",
+ "parameters": [
+ {
+ "description": "query params",
+ "name": "request",
+ "in": "body",
+ "required": true,
+ "schema": {
+ "$ref": "#/definitions/schema.OpenAIRequest"
+ }
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Response",
+ "schema": {
+ "$ref": "#/definitions/schema.OpenAIResponse"
+ }
+ }
+ }
+ }
+ },
+ "/v1/completions": {
+ "post": {
+ "summary": "Generate completions for a given prompt and model.",
+ "parameters": [
+ {
+ "description": "query params",
+ "name": "request",
+ "in": "body",
+ "required": true,
+ "schema": {
+ "$ref": "#/definitions/schema.OpenAIRequest"
+ }
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Response",
+ "schema": {
+ "$ref": "#/definitions/schema.OpenAIResponse"
+ }
+ }
+ }
+ }
+ },
+ "/v1/embeddings": {
+ "post": {
+ "summary": "Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms.",
+ "parameters": [
+ {
+ "description": "query params",
+ "name": "request",
+ "in": "body",
+ "required": true,
+ "schema": {
+ "$ref": "#/definitions/schema.OpenAIRequest"
+ }
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Response",
+ "schema": {
+ "$ref": "#/definitions/schema.OpenAIResponse"
+ }
+ }
+ }
+ }
+ },
+ "/v1/images/generations": {
+ "post": {
+ "summary": "Creates an image given a prompt.",
+ "parameters": [
+ {
+ "description": "query params",
+ "name": "request",
+ "in": "body",
+ "required": true,
+ "schema": {
+ "$ref": "#/definitions/schema.OpenAIRequest"
+ }
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Response",
+ "schema": {
+ "$ref": "#/definitions/schema.OpenAIResponse"
+ }
+ }
+ }
+ }
+ },
+ "/v1/text-to-speech/{voice-id}": {
+ "post": {
+ "summary": "Generates audio from the input text.",
+ "parameters": [
+ {
+ "type": "string",
+ "description": "Account ID",
+ "name": "voice-id",
+ "in": "path",
+ "required": true
+ },
+ {
+ "description": "query params",
+ "name": "request",
+ "in": "body",
+ "required": true,
+ "schema": {
+ "$ref": "#/definitions/schema.TTSRequest"
+ }
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Response",
+ "schema": {
+ "type": "string"
+ }
+ }
+ }
+ }
+ }
+ },
+ "definitions": {
+ "grammar.Argument": {
+ "type": "object",
+ "properties": {
+ "properties": {
+ "type": "object",
+ "additionalProperties": true
+ },
+ "type": {
+ "type": "string"
+ }
+ }
+ },
+ "grammar.Function": {
+ "type": "object",
+ "properties": {
+ "description": {
+ "type": "string"
+ },
+ "name": {
+ "type": "string"
+ },
+ "parameters": {
+ "type": "object",
+ "additionalProperties": true
+ }
+ }
+ },
+ "grammar.FunctionName": {
+ "type": "object",
+ "properties": {
+ "const": {
+ "type": "string"
+ }
+ }
+ },
+ "grammar.Item": {
+ "type": "object",
+ "properties": {
+ "properties": {
+ "$ref": "#/definitions/grammar.Properties"
+ },
+ "type": {
+ "type": "string"
+ }
+ }
+ },
+ "grammar.JSONFunctionStructure": {
+ "type": "object",
+ "properties": {
+ "$defs": {
+ "type": "object",
+ "additionalProperties": true
+ },
+ "anyOf": {
+ "type": "array",
+ "items": {
+ "$ref": "#/definitions/grammar.Item"
+ }
+ },
+ "oneOf": {
+ "type": "array",
+ "items": {
+ "$ref": "#/definitions/grammar.Item"
+ }
+ }
+ }
+ },
+ "grammar.Properties": {
+ "type": "object",
+ "properties": {
+ "arguments": {
+ "$ref": "#/definitions/grammar.Argument"
+ },
+ "function": {
+ "$ref": "#/definitions/grammar.FunctionName"
+ }
+ }
+ },
+ "grammar.Tool": {
+ "type": "object",
+ "properties": {
+ "function": {
+ "$ref": "#/definitions/grammar.Function"
+ },
+ "type": {
+ "type": "string"
+ }
+ }
+ },
+ "openai.Assistant": {
+ "type": "object",
+ "properties": {
+ "created": {
+ "description": "The time at which the assistant was created.",
+ "type": "integer"
+ },
+ "description": {
+ "description": "The description of the assistant.",
+ "type": "string"
+ },
+ "file_ids": {
+ "description": "A list of file IDs attached to this assistant.",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "id": {
+ "description": "The unique identifier of the assistant.",
+ "type": "string"
+ },
+ "instructions": {
+ "description": "The system instructions that the assistant uses.",
+ "type": "string"
+ },
+ "metadata": {
+ "description": "Set of key-value pairs attached to the assistant.",
+ "type": "object",
+ "additionalProperties": {
+ "type": "string"
+ }
+ },
+ "model": {
+ "description": "The model ID used by the assistant.",
+ "type": "string"
+ },
+ "name": {
+ "description": "The name of the assistant.",
+ "type": "string"
+ },
+ "object": {
+ "description": "Object type, which is \"assistant\".",
+ "type": "string"
+ },
+ "tools": {
+ "description": "A list of tools enabled on the assistant.",
+ "type": "array",
+ "items": {
+ "$ref": "#/definitions/openai.Tool"
+ }
+ }
+ }
+ },
+ "openai.AssistantRequest": {
+ "type": "object",
+ "properties": {
+ "description": {
+ "type": "string"
+ },
+ "file_ids": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "instructions": {
+ "type": "string"
+ },
+ "metadata": {
+ "type": "object",
+ "additionalProperties": {
+ "type": "string"
+ }
+ },
+ "model": {
+ "type": "string"
+ },
+ "name": {
+ "type": "string"
+ },
+ "tools": {
+ "type": "array",
+ "items": {
+ "$ref": "#/definitions/openai.Tool"
+ }
+ }
+ }
+ },
+ "openai.Tool": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "$ref": "#/definitions/openai.ToolType"
+ }
+ }
+ },
+ "openai.ToolType": {
+ "type": "string",
+ "enum": [
+ "code_interpreter",
+ "retrieval",
+ "function"
+ ],
+ "x-enum-varnames": [
+ "CodeInterpreter",
+ "Retrieval",
+ "Function"
+ ]
+ },
+ "schema.ChatCompletionResponseFormat": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string"
+ }
+ }
+ },
+ "schema.Choice": {
+ "type": "object",
+ "properties": {
+ "delta": {
+ "$ref": "#/definitions/schema.Message"
+ },
+ "finish_reason": {
+ "type": "string"
+ },
+ "index": {
+ "type": "integer"
+ },
+ "message": {
+ "$ref": "#/definitions/schema.Message"
+ },
+ "text": {
+ "type": "string"
+ }
+ }
+ },
+ "schema.FunctionCall": {
+ "type": "object",
+ "properties": {
+ "arguments": {
+ "type": "string"
+ },
+ "name": {
+ "type": "string"
+ }
+ }
+ },
+ "schema.Item": {
+ "type": "object",
+ "properties": {
+ "b64_json": {
+ "type": "string"
+ },
+ "embedding": {
+ "type": "array",
+ "items": {
+ "type": "number"
+ }
+ },
+ "index": {
+ "type": "integer"
+ },
+ "object": {
+ "type": "string"
+ },
+ "url": {
+ "description": "Images",
+ "type": "string"
+ }
+ }
+ },
+ "schema.Message": {
+ "type": "object",
+ "properties": {
+ "content": {
+ "description": "The message content"
+ },
+ "function_call": {
+ "description": "A result of a function call"
+ },
+ "name": {
+ "description": "The message name (used for tools calls)",
+ "type": "string"
+ },
+ "role": {
+ "description": "The message role",
+ "type": "string"
+ },
+ "string_content": {
+ "type": "string"
+ },
+ "string_images": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "tool_calls": {
+ "type": "array",
+ "items": {
+ "$ref": "#/definitions/schema.ToolCall"
+ }
+ }
+ }
+ },
+ "schema.OpenAIRequest": {
+ "type": "object",
+ "required": [
+ "file"
+ ],
+ "properties": {
+ "backend": {
+ "type": "string"
+ },
+ "batch": {
+ "description": "Custom parameters - not present in the OpenAI API",
+ "type": "integer"
+ },
+ "clip_skip": {
+ "description": "Diffusers",
+ "type": "integer"
+ },
+ "echo": {
+ "type": "boolean"
+ },
+ "file": {
+ "description": "whisper",
+ "type": "string"
+ },
+ "frequency_penalty": {
+ "type": "number"
+ },
+ "function_call": {
+ "description": "might be a string or an object"
+ },
+ "functions": {
+ "description": "A list of available functions to call",
+ "type": "array",
+ "items": {
+ "$ref": "#/definitions/grammar.Function"
+ }
+ },
+ "grammar": {
+ "description": "A grammar to constrain the LLM output",
+ "type": "string"
+ },
+ "grammar_json_functions": {
+ "$ref": "#/definitions/grammar.JSONFunctionStructure"
+ },
+ "ignore_eos": {
+ "type": "boolean"
+ },
+ "input": {},
+ "instruction": {
+ "description": "Edit endpoint",
+ "type": "string"
+ },
+ "language": {
+ "description": "Also part of the OpenAI official spec",
+ "type": "string"
+ },
+ "max_tokens": {
+ "type": "integer"
+ },
+ "messages": {
+ "description": "Messages is read only by chat/completion API calls",
+ "type": "array",
+ "items": {
+ "$ref": "#/definitions/schema.Message"
+ }
+ },
+ "mode": {
+ "description": "Image (not supported by OpenAI)",
+ "type": "integer"
+ },
+ "model": {
+ "description": "Also part of the OpenAI official spec",
+ "type": "string"
+ },
+ "model_base_name": {
+ "description": "AutoGPTQ",
+ "type": "string"
+ },
+ "n": {
+ "description": "Also part of the OpenAI official spec. use it for returning multiple results",
+ "type": "integer"
+ },
+ "n_keep": {
+ "type": "integer"
+ },
+ "negative_prompt": {
+ "type": "string"
+ },
+ "negative_prompt_scale": {
+ "type": "number"
+ },
+ "presence_penalty": {
+ "type": "number"
+ },
+ "prompt": {
+ "description": "Prompt is read only by completion/image API calls"
+ },
+ "repeat_penalty": {
+ "type": "number"
+ },
+ "response_format": {
+ "description": "whisper/image",
+ "allOf": [
+ {
+ "$ref": "#/definitions/schema.ChatCompletionResponseFormat"
+ }
+ ]
+ },
+ "rope_freq_base": {
+ "type": "number"
+ },
+ "rope_freq_scale": {
+ "type": "number"
+ },
+ "seed": {
+ "type": "integer"
+ },
+ "size": {
+ "description": "image",
+ "type": "string"
+ },
+ "step": {
+ "type": "integer"
+ },
+ "stop": {},
+ "stream": {
+ "type": "boolean"
+ },
+ "temperature": {
+ "type": "number"
+ },
+ "tfz": {
+ "type": "number"
+ },
+ "tokenizer": {
+ "description": "RWKV (?)",
+ "type": "string"
+ },
+ "tool_choice": {},
+ "tools": {
+ "type": "array",
+ "items": {
+ "$ref": "#/definitions/grammar.Tool"
+ }
+ },
+ "top_k": {
+ "type": "integer"
+ },
+ "top_p": {
+ "description": "Common options between all the API calls, part of the OpenAI spec",
+ "type": "number"
+ },
+ "typical_p": {
+ "type": "number"
+ },
+ "use_fast_tokenizer": {
+ "description": "AutoGPTQ",
+ "type": "boolean"
+ }
+ }
+ },
+ "schema.OpenAIResponse": {
+ "type": "object",
+ "properties": {
+ "choices": {
+ "type": "array",
+ "items": {
+ "$ref": "#/definitions/schema.Choice"
+ }
+ },
+ "created": {
+ "type": "integer"
+ },
+ "data": {
+ "type": "array",
+ "items": {
+ "$ref": "#/definitions/schema.Item"
+ }
+ },
+ "id": {
+ "type": "string"
+ },
+ "model": {
+ "type": "string"
+ },
+ "object": {
+ "type": "string"
+ },
+ "usage": {
+ "$ref": "#/definitions/schema.OpenAIUsage"
+ }
+ }
+ },
+ "schema.OpenAIUsage": {
+ "type": "object",
+ "properties": {
+ "completion_tokens": {
+ "type": "integer"
+ },
+ "prompt_tokens": {
+ "type": "integer"
+ },
+ "total_tokens": {
+ "type": "integer"
+ }
+ }
+ },
+ "schema.TTSRequest": {
+ "type": "object",
+ "properties": {
+ "backend": {
+ "type": "string"
+ },
+ "input": {
+ "type": "string"
+ },
+ "model": {
+ "type": "string"
+ },
+ "voice": {
+ "type": "string"
+ }
+ }
+ },
+ "schema.ToolCall": {
+ "type": "object",
+ "properties": {
+ "function": {
+ "$ref": "#/definitions/schema.FunctionCall"
+ },
+ "id": {
+ "type": "string"
+ },
+ "index": {
+ "type": "integer"
+ },
+ "type": {
+ "type": "string"
+ }
+ }
+ }
+ },
+ "securityDefinitions": {
+ "BearerAuth": {
+ "type": "apiKey",
+ "name": "Authorization",
+ "in": "header"
+ }
+ }
+}
\ No newline at end of file
diff --git a/swagger/swagger.yaml b/swagger/swagger.yaml
new file mode 100644
index 00000000..b33e1fab
--- /dev/null
+++ b/swagger/swagger.yaml
@@ -0,0 +1,519 @@
+basePath: /
+definitions:
+ grammar.Argument:
+ properties:
+ properties:
+ additionalProperties: true
+ type: object
+ type:
+ type: string
+ type: object
+ grammar.Function:
+ properties:
+ description:
+ type: string
+ name:
+ type: string
+ parameters:
+ additionalProperties: true
+ type: object
+ type: object
+ grammar.FunctionName:
+ properties:
+ const:
+ type: string
+ type: object
+ grammar.Item:
+ properties:
+ properties:
+ $ref: '#/definitions/grammar.Properties'
+ type:
+ type: string
+ type: object
+ grammar.JSONFunctionStructure:
+ properties:
+ $defs:
+ additionalProperties: true
+ type: object
+ anyOf:
+ items:
+ $ref: '#/definitions/grammar.Item'
+ type: array
+ oneOf:
+ items:
+ $ref: '#/definitions/grammar.Item'
+ type: array
+ type: object
+ grammar.Properties:
+ properties:
+ arguments:
+ $ref: '#/definitions/grammar.Argument'
+ function:
+ $ref: '#/definitions/grammar.FunctionName'
+ type: object
+ grammar.Tool:
+ properties:
+ function:
+ $ref: '#/definitions/grammar.Function'
+ type:
+ type: string
+ type: object
+ openai.Assistant:
+ properties:
+ created:
+ description: The time at which the assistant was created.
+ type: integer
+ description:
+ description: The description of the assistant.
+ type: string
+ file_ids:
+ description: A list of file IDs attached to this assistant.
+ items:
+ type: string
+ type: array
+ id:
+ description: The unique identifier of the assistant.
+ type: string
+ instructions:
+ description: The system instructions that the assistant uses.
+ type: string
+ metadata:
+ additionalProperties:
+ type: string
+ description: Set of key-value pairs attached to the assistant.
+ type: object
+ model:
+ description: The model ID used by the assistant.
+ type: string
+ name:
+ description: The name of the assistant.
+ type: string
+ object:
+ description: Object type, which is "assistant".
+ type: string
+ tools:
+ description: A list of tools enabled on the assistant.
+ items:
+ $ref: '#/definitions/openai.Tool'
+ type: array
+ type: object
+ openai.AssistantRequest:
+ properties:
+ description:
+ type: string
+ file_ids:
+ items:
+ type: string
+ type: array
+ instructions:
+ type: string
+ metadata:
+ additionalProperties:
+ type: string
+ type: object
+ model:
+ type: string
+ name:
+ type: string
+ tools:
+ items:
+ $ref: '#/definitions/openai.Tool'
+ type: array
+ type: object
+ openai.Tool:
+ properties:
+ type:
+ $ref: '#/definitions/openai.ToolType'
+ type: object
+ openai.ToolType:
+ enum:
+ - code_interpreter
+ - retrieval
+ - function
+ type: string
+ x-enum-varnames:
+ - CodeInterpreter
+ - Retrieval
+ - Function
+ schema.ChatCompletionResponseFormat:
+ properties:
+ type:
+ type: string
+ type: object
+ schema.Choice:
+ properties:
+ delta:
+ $ref: '#/definitions/schema.Message'
+ finish_reason:
+ type: string
+ index:
+ type: integer
+ message:
+ $ref: '#/definitions/schema.Message'
+ text:
+ type: string
+ type: object
+ schema.FunctionCall:
+ properties:
+ arguments:
+ type: string
+ name:
+ type: string
+ type: object
+ schema.Item:
+ properties:
+ b64_json:
+ type: string
+ embedding:
+ items:
+ type: number
+ type: array
+ index:
+ type: integer
+ object:
+ type: string
+ url:
+ description: Images
+ type: string
+ type: object
+ schema.Message:
+ properties:
+ content:
+ description: The message content
+ function_call:
+ description: A result of a function call
+ name:
+ description: The message name (used for tools calls)
+ type: string
+ role:
+ description: The message role
+ type: string
+ string_content:
+ type: string
+ string_images:
+ items:
+ type: string
+ type: array
+ tool_calls:
+ items:
+ $ref: '#/definitions/schema.ToolCall'
+ type: array
+ type: object
+ schema.OpenAIRequest:
+ properties:
+ backend:
+ type: string
+ batch:
+ description: Custom parameters - not present in the OpenAI API
+ type: integer
+ clip_skip:
+ description: Diffusers
+ type: integer
+ echo:
+ type: boolean
+ file:
+ description: whisper
+ type: string
+ frequency_penalty:
+ type: number
+ function_call:
+ description: might be a string or an object
+ functions:
+ description: A list of available functions to call
+ items:
+ $ref: '#/definitions/grammar.Function'
+ type: array
+ grammar:
+ description: A grammar to constrain the LLM output
+ type: string
+ grammar_json_functions:
+ $ref: '#/definitions/grammar.JSONFunctionStructure'
+ ignore_eos:
+ type: boolean
+ input: {}
+ instruction:
+ description: Edit endpoint
+ type: string
+ language:
+ description: Also part of the OpenAI official spec
+ type: string
+ max_tokens:
+ type: integer
+ messages:
+ description: Messages is read only by chat/completion API calls
+ items:
+ $ref: '#/definitions/schema.Message'
+ type: array
+ mode:
+ description: Image (not supported by OpenAI)
+ type: integer
+ model:
+ description: Also part of the OpenAI official spec
+ type: string
+ model_base_name:
+ description: AutoGPTQ
+ type: string
+ "n":
+ description: Also part of the OpenAI official spec. use it for returning multiple
+ results
+ type: integer
+ n_keep:
+ type: integer
+ negative_prompt:
+ type: string
+ negative_prompt_scale:
+ type: number
+ presence_penalty:
+ type: number
+ prompt:
+ description: Prompt is read only by completion/image API calls
+ repeat_penalty:
+ type: number
+ response_format:
+ allOf:
+ - $ref: '#/definitions/schema.ChatCompletionResponseFormat'
+ description: whisper/image
+ rope_freq_base:
+ type: number
+ rope_freq_scale:
+ type: number
+ seed:
+ type: integer
+ size:
+ description: image
+ type: string
+ step:
+ type: integer
+ stop: {}
+ stream:
+ type: boolean
+ temperature:
+ type: number
+ tfz:
+ type: number
+ tokenizer:
+ description: RWKV (?)
+ type: string
+ tool_choice: {}
+ tools:
+ items:
+ $ref: '#/definitions/grammar.Tool'
+ type: array
+ top_k:
+ type: integer
+ top_p:
+ description: Common options between all the API calls, part of the OpenAI
+ spec
+ type: number
+ typical_p:
+ type: number
+ use_fast_tokenizer:
+ description: AutoGPTQ
+ type: boolean
+ required:
+ - file
+ type: object
+ schema.OpenAIResponse:
+ properties:
+ choices:
+ items:
+ $ref: '#/definitions/schema.Choice'
+ type: array
+ created:
+ type: integer
+ data:
+ items:
+ $ref: '#/definitions/schema.Item'
+ type: array
+ id:
+ type: string
+ model:
+ type: string
+ object:
+ type: string
+ usage:
+ $ref: '#/definitions/schema.OpenAIUsage'
+ type: object
+ schema.OpenAIUsage:
+ properties:
+ completion_tokens:
+ type: integer
+ prompt_tokens:
+ type: integer
+ total_tokens:
+ type: integer
+ type: object
+ schema.TTSRequest:
+ properties:
+ backend:
+ type: string
+ input:
+ type: string
+ model:
+ type: string
+ voice:
+ type: string
+ type: object
+ schema.ToolCall:
+ properties:
+ function:
+ $ref: '#/definitions/schema.FunctionCall'
+ id:
+ type: string
+ index:
+ type: integer
+ type:
+ type: string
+ type: object
+host: localhost:8080
+info:
+ contact:
+ name: OpenAI Support
+ url: https://help.openai.com/
+ description: The OpenAI REST API.
+ license:
+ name: MIT
+ url: https://github.com/openai/openai-openapi/blob/master/LICENSE
+ title: LocalAI API
+ version: 2.0.0
+paths:
+ /v1/assistants:
+ post:
+ parameters:
+ - description: query params
+ in: body
+ name: request
+ required: true
+ schema:
+ $ref: '#/definitions/openai.AssistantRequest'
+ responses:
+ "200":
+ description: Response
+ schema:
+ $ref: '#/definitions/openai.Assistant'
+ summary: Create an assistant with a model and instructions.
+ /v1/audio/speech:
+ post:
+ parameters:
+ - description: query params
+ in: body
+ name: request
+ required: true
+ schema:
+ $ref: '#/definitions/schema.TTSRequest'
+ responses:
+ "200":
+ description: Response
+ schema:
+ type: string
+ summary: Generates audio from the input text.
+ /v1/audio/transcriptions:
+ post:
+ consumes:
+ - multipart/form-data
+ parameters:
+ - description: model
+ in: formData
+ name: model
+ required: true
+ type: string
+ - description: file
+ in: formData
+ name: file
+ required: true
+ type: file
+ responses:
+ "200":
+ description: Response
+ schema:
+ additionalProperties:
+ type: string
+ type: object
+ summary: Transcribes audio into the input language.
+ /v1/chat/completions:
+ post:
+ parameters:
+ - description: query params
+ in: body
+ name: request
+ required: true
+ schema:
+ $ref: '#/definitions/schema.OpenAIRequest'
+ responses:
+ "200":
+ description: Response
+ schema:
+ $ref: '#/definitions/schema.OpenAIResponse'
+ summary: Generate a chat completions for a given prompt and model.
+ /v1/completions:
+ post:
+ parameters:
+ - description: query params
+ in: body
+ name: request
+ required: true
+ schema:
+ $ref: '#/definitions/schema.OpenAIRequest'
+ responses:
+ "200":
+ description: Response
+ schema:
+ $ref: '#/definitions/schema.OpenAIResponse'
+ summary: Generate completions for a given prompt and model.
+ /v1/embeddings:
+ post:
+ parameters:
+ - description: query params
+ in: body
+ name: request
+ required: true
+ schema:
+ $ref: '#/definitions/schema.OpenAIRequest'
+ responses:
+ "200":
+ description: Response
+ schema:
+ $ref: '#/definitions/schema.OpenAIResponse'
+ summary: Get a vector representation of a given input that can be easily consumed
+ by machine learning models and algorithms.
+ /v1/images/generations:
+ post:
+ parameters:
+ - description: query params
+ in: body
+ name: request
+ required: true
+ schema:
+ $ref: '#/definitions/schema.OpenAIRequest'
+ responses:
+ "200":
+ description: Response
+ schema:
+ $ref: '#/definitions/schema.OpenAIResponse'
+ summary: Creates an image given a prompt.
+ /v1/text-to-speech/{voice-id}:
+ post:
+ parameters:
+ - description: Account ID
+ in: path
+ name: voice-id
+ required: true
+ type: string
+ - description: query params
+ in: body
+ name: request
+ required: true
+ schema:
+ $ref: '#/definitions/schema.TTSRequest'
+ responses:
+ "200":
+ description: Response
+ schema:
+ type: string
+ summary: Generates audio from the input text.
+securityDefinitions:
+ BearerAuth:
+ in: header
+ name: Authorization
+ type: apiKey
+swagger: "2.0"
From a7fc89c2078f2c77e1a2e533c3faff59f9043ed6 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Fri, 29 Mar 2024 22:29:50 +0100
Subject: [PATCH 0066/2750] :arrow_up: Update ggerganov/whisper.cpp (#1927)
Signed-off-by: GitHub
Co-authored-by: mudler
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index 440f5158..6c02b80e 100644
--- a/Makefile
+++ b/Makefile
@@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
# whisper.cpp version
-WHISPER_CPP_VERSION?=fc366b807a17dc05813a6fcc13c8c4dfd442fa6a
+WHISPER_CPP_VERSION?=1e8f28c42a1472ae7c49d0502ea06e2f5bc29a69
# bert.cpp version
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
From 93702e39d420a39c7e4afa9fcd925e9eeb10492c Mon Sep 17 00:00:00 2001
From: cryptk <421501+cryptk@users.noreply.github.com>
Date: Fri, 29 Mar 2024 16:32:40 -0500
Subject: [PATCH 0067/2750] feat(build): adjust number of parallel make jobs
(#1915)
* feat(build): adjust number of parallel make jobs
* fix: update make on MacOS from brew to support --output-sync argument
* fix: cache grpc with version as part of key to improve validity of cache hits
* fix: use gmake for tests-apple to use the updated GNU make version
* fix: actually use the new make version for tests-apple
* feat: parallelize tests-extra
* feat: attempt to cache grpc build for docker images
* fix: don't quote GRPC version
* fix: don't cache go modules, we have limited cache space, better used elsewhere
* fix: release with the same version of go that we test with
* fix: don't fail on exporting cache layers
* fix: remove deprecated BUILD_GRPC docker arg from Makefile
---
.github/workflows/image-pr.yml | 13 ++++++++---
.github/workflows/image.yml | 26 ++++++++++++++++++++--
.github/workflows/image_build.yml | 37 ++++++++++++++++++++++++++++---
.github/workflows/release.yaml | 20 +++++++++++------
.github/workflows/test-extra.yml | 36 +++++++++++++++---------------
.github/workflows/test.yml | 23 ++++++++++++-------
Dockerfile | 37 ++++++++++++++++++++++++-------
Makefile | 6 ++++-
8 files changed, 148 insertions(+), 50 deletions(-)
diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml
index aa59188c..b703b16d 100644
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -22,7 +22,7 @@ jobs:
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
base-image: ${{ matrix.base-image }}
- makeflags: "--jobs=3 --output-sync=target"
+ makeflags: ${{ matrix.makeflags }}
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
@@ -42,6 +42,7 @@ jobs:
image-type: 'extras'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
+ makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "1"
@@ -52,6 +53,7 @@ jobs:
image-type: 'extras'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
+ makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
@@ -60,6 +62,7 @@ jobs:
image-type: 'extras'
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
runs-on: 'arc-runner-set'
+ makeflags: "--jobs=3 --output-sync=target"
- build-type: 'sycl_f16'
platforms: 'linux/amd64'
tag-latest: 'false'
@@ -68,6 +71,7 @@ jobs:
ffmpeg: 'true'
image-type: 'extras'
runs-on: 'arc-runner-set'
+ makeflags: "--jobs=3 --output-sync=target"
core-image-build:
uses: ./.github/workflows/image_build.yml
with:
@@ -81,7 +85,7 @@ jobs:
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
base-image: ${{ matrix.base-image }}
- makeflags: "--jobs=3 --output-sync=target"
+ makeflags: ${{ matrix.makeflags }}
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
@@ -98,6 +102,7 @@ jobs:
image-type: 'core'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
+ makeflags: "--jobs=5 --output-sync=target"
- build-type: 'sycl_f16'
platforms: 'linux/amd64'
tag-latest: 'false'
@@ -106,6 +111,7 @@ jobs:
ffmpeg: 'true'
image-type: 'core'
runs-on: 'arc-runner-set'
+ makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "1"
@@ -115,4 +121,5 @@ jobs:
ffmpeg: 'true'
image-type: 'core'
runs-on: 'ubuntu-latest'
- base-image: "ubuntu:22.04"
\ No newline at end of file
+ base-image: "ubuntu:22.04"
+ makeflags: "--jobs=5 --output-sync=target"
\ No newline at end of file
diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index 40deb0ec..79a38fc5 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -27,7 +27,7 @@ jobs:
runs-on: ${{ matrix.runs-on }}
base-image: ${{ matrix.base-image }}
aio: ${{ matrix.aio }}
- makeflags: "--jobs=3 --output-sync=target"
+ makeflags: ${{ matrix.makeflags }}
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
@@ -49,6 +49,7 @@ jobs:
image-type: 'extras'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
+ makeflags: "--jobs=3 --output-sync=target"
- build-type: ''
platforms: 'linux/amd64'
tag-latest: 'auto'
@@ -57,6 +58,7 @@ jobs:
image-type: 'extras'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
+ makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
@@ -67,6 +69,7 @@ jobs:
image-type: 'extras'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
+ makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "1"
@@ -77,6 +80,7 @@ jobs:
image-type: 'extras'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
+ makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
@@ -88,6 +92,7 @@ jobs:
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
aio: "-aio-gpu-nvidia-cuda-11"
+ makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "1"
@@ -99,6 +104,7 @@ jobs:
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
aio: "-aio-gpu-nvidia-cuda-12"
+ makeflags: "--jobs=3 --output-sync=target"
- build-type: ''
#platforms: 'linux/amd64,linux/arm64'
platforms: 'linux/amd64'
@@ -108,6 +114,7 @@ jobs:
image-type: 'extras'
base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
+ makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'auto'
@@ -117,6 +124,7 @@ jobs:
aio: "-aio-gpu-hipblas"
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
runs-on: 'arc-runner-set'
+ makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
@@ -125,6 +133,7 @@ jobs:
image-type: 'extras'
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
runs-on: 'arc-runner-set'
+ makeflags: "--jobs=3 --output-sync=target"
- build-type: 'sycl_f16'
platforms: 'linux/amd64'
tag-latest: 'auto'
@@ -134,6 +143,7 @@ jobs:
image-type: 'extras'
runs-on: 'arc-runner-set'
aio: "-aio-gpu-intel-f16"
+ makeflags: "--jobs=3 --output-sync=target"
- build-type: 'sycl_f32'
platforms: 'linux/amd64'
tag-latest: 'auto'
@@ -143,6 +153,7 @@ jobs:
image-type: 'extras'
runs-on: 'arc-runner-set'
aio: "-aio-gpu-intel-f32"
+ makeflags: "--jobs=3 --output-sync=target"
# Core images
- build-type: 'sycl_f16'
platforms: 'linux/amd64'
@@ -152,6 +163,7 @@ jobs:
ffmpeg: 'false'
image-type: 'core'
runs-on: 'arc-runner-set'
+ makeflags: "--jobs=3 --output-sync=target"
- build-type: 'sycl_f32'
platforms: 'linux/amd64'
tag-latest: 'false'
@@ -160,6 +172,7 @@ jobs:
ffmpeg: 'false'
image-type: 'core'
runs-on: 'arc-runner-set'
+ makeflags: "--jobs=3 --output-sync=target"
- build-type: 'sycl_f16'
platforms: 'linux/amd64'
tag-latest: 'false'
@@ -168,6 +181,7 @@ jobs:
ffmpeg: 'true'
image-type: 'core'
runs-on: 'arc-runner-set'
+ makeflags: "--jobs=3 --output-sync=target"
- build-type: 'sycl_f32'
platforms: 'linux/amd64'
tag-latest: 'false'
@@ -176,6 +190,7 @@ jobs:
ffmpeg: 'true'
image-type: 'core'
runs-on: 'arc-runner-set'
+ makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
@@ -184,6 +199,7 @@ jobs:
image-type: 'core'
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
runs-on: 'arc-runner-set'
+ makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
@@ -192,6 +208,7 @@ jobs:
image-type: 'core'
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
runs-on: 'arc-runner-set'
+ makeflags: "--jobs=3 --output-sync=target"
core-image-build:
uses: ./.github/workflows/image_build.yml
@@ -207,7 +224,7 @@ jobs:
runs-on: ${{ matrix.runs-on }}
aio: ${{ matrix.aio }}
base-image: ${{ matrix.base-image }}
- makeflags: "--jobs=3 --output-sync=target"
+ makeflags: ${{ matrix.makeflags }}
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
@@ -225,6 +242,7 @@ jobs:
base-image: "ubuntu:22.04"
runs-on: 'ubuntu-latest'
aio: "-aio-cpu"
+ makeflags: "--jobs=5 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
@@ -235,6 +253,7 @@ jobs:
image-type: 'core'
base-image: "ubuntu:22.04"
runs-on: 'ubuntu-latest'
+ makeflags: "--jobs=5 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "1"
@@ -245,6 +264,7 @@ jobs:
image-type: 'core'
base-image: "ubuntu:22.04"
runs-on: 'ubuntu-latest'
+ makeflags: "--jobs=5 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
@@ -255,6 +275,7 @@ jobs:
image-type: 'core'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
+ makeflags: "--jobs=5 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "1"
@@ -265,3 +286,4 @@ jobs:
image-type: 'core'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
+ makeflags: "--jobs=5 --output-sync=target"
diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml
index 659f85de..d07df441 100644
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -49,7 +49,7 @@ on:
makeflags:
description: 'Make Flags'
required: false
- default: ''
+ default: '--jobs=3 --output-sync=target'
type: string
aio:
description: 'AIO Image Name'
@@ -79,6 +79,7 @@ jobs:
&& sudo apt-get install -y git
- name: Checkout
uses: actions/checkout@v4
+
- name: Release space from worker
if: inputs.runs-on == 'ubuntu-latest'
run: |
@@ -120,6 +121,7 @@ jobs:
sudo rm -rf "/usr/local/share/boost" || true
sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
df -h
+
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
@@ -134,6 +136,7 @@ jobs:
flavor: |
latest=${{ inputs.tag-latest }}
suffix=${{ inputs.tag-suffix }}
+
- name: Docker meta AIO (quay.io)
if: inputs.aio != ''
id: meta_aio
@@ -147,6 +150,7 @@ jobs:
flavor: |
latest=${{ inputs.tag-latest }}
suffix=${{ inputs.aio }}
+
- name: Docker meta AIO (dockerhub)
if: inputs.aio != ''
id: meta_aio_dockerhub
@@ -160,6 +164,7 @@ jobs:
flavor: |
latest=${{ inputs.tag-latest }}
suffix=${{ inputs.aio }}
+
- name: Set up QEMU
uses: docker/setup-qemu-action@master
with:
@@ -184,6 +189,25 @@ jobs:
username: ${{ secrets.quayUsername }}
password: ${{ secrets.quayPassword }}
+ - name: Cache GRPC
+ uses: docker/build-push-action@v5
+ with:
+ builder: ${{ steps.buildx.outputs.name }}
+ build-args: |
+ IMAGE_TYPE=${{ inputs.image-type }}
+ BASE_IMAGE=${{ inputs.base-image }}
+ MAKEFLAGS=${{ inputs.makeflags }}
+ GRPC_VERSION=v1.58.0
+ context: .
+ file: ./Dockerfile
+ cache-from: type=gha
+ cache-to: type=gha,ignore-error=true
+ target: grpc
+ platforms: ${{ inputs.platforms }}
+ push: false
+ tags: ${{ steps.meta.outputs.tags }}
+ labels: ${{ steps.meta.outputs.labels }}
+
- name: Build and push
uses: docker/build-push-action@v5
with:
@@ -198,18 +222,20 @@ jobs:
MAKEFLAGS=${{ inputs.makeflags }}
context: .
file: ./Dockerfile
+ cache-from: type=gha
platforms: ${{ inputs.platforms }}
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
- -
- name: Inspect image
+
+ - name: Inspect image
if: github.event_name != 'pull_request'
run: |
docker pull localai/localai:${{ steps.meta.outputs.version }}
docker image inspect localai/localai:${{ steps.meta.outputs.version }}
docker pull quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
docker image inspect quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
+
- name: Build and push AIO image
if: inputs.aio != ''
uses: docker/build-push-action@v5
@@ -217,12 +243,14 @@ jobs:
builder: ${{ steps.buildx.outputs.name }}
build-args: |
BASE_IMAGE=quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
+ MAKEFLAGS=${{ inputs.makeflags }}
context: .
file: ./Dockerfile.aio
platforms: ${{ inputs.platforms }}
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta_aio.outputs.tags }}
labels: ${{ steps.meta_aio.outputs.labels }}
+
- name: Build and push AIO image (dockerhub)
if: inputs.aio != ''
uses: docker/build-push-action@v5
@@ -230,15 +258,18 @@ jobs:
builder: ${{ steps.buildx.outputs.name }}
build-args: |
BASE_IMAGE=localai/localai:${{ steps.meta.outputs.version }}
+ MAKEFLAGS=${{ inputs.makeflags }}
context: .
file: ./Dockerfile.aio
platforms: ${{ inputs.platforms }}
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta_aio_dockerhub.outputs.tags }}
labels: ${{ steps.meta_aio_dockerhub.outputs.labels }}
+
- name: job summary
run: |
echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
+
- name: job summary(AIO)
if: inputs.aio != ''
run: |
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index 6ac816ee..1d749189 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -2,6 +2,9 @@ name: Build and Release
on: push
+env:
+ GRPC_VERSION: v1.58.0
+
permissions:
contents: write
@@ -32,7 +35,8 @@ jobs:
submodules: true
- uses: actions/setup-go@v4
with:
- go-version: '>=1.21.0'
+ go-version: '1.21.x'
+ cache: false
- name: Dependencies
run: |
sudo apt-get update
@@ -54,17 +58,17 @@ jobs:
uses: actions/cache@v3
with:
path: grpc
- key: ${{ runner.os }}-grpc
+ key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }}
- name: Build grpc
if: steps.cache-grpc.outputs.cache-hit != 'true'
run: |
- git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
+ git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
-DgRPC_BUILD_TESTS=OFF \
- ../.. && sudo make -j12
+ ../.. && sudo make --jobs 5 --output-sync=target
- name: Install gRPC
run: |
- cd grpc && cd cmake/build && sudo make -j12 install
+ cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install
- name: Build
id: build
env:
@@ -98,7 +102,8 @@ jobs:
submodules: true
- uses: actions/setup-go@v4
with:
- go-version: '>=1.21.0'
+ go-version: '1.21.x'
+ cache: false
- name: Dependencies
run: |
sudo apt-get install -y --no-install-recommends libopencv-dev
@@ -135,7 +140,8 @@ jobs:
submodules: true
- uses: actions/setup-go@v4
with:
- go-version: '>=1.21.0'
+ go-version: '1.21.x'
+ cache: false
- name: Dependencies
run: |
brew install protobuf grpc
diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml
index 5f61835d..6f92c806 100644
--- a/.github/workflows/test-extra.yml
+++ b/.github/workflows/test-extra.yml
@@ -40,8 +40,8 @@ jobs:
- name: Test transformers
run: |
export PATH=$PATH:/opt/conda/bin
- make -C backend/python/transformers
- make -C backend/python/transformers test
+ make --jobs=5 --output-sync=target -C backend/python/transformers
+ make --jobs=5 --output-sync=target -C backend/python/transformers test
tests-sentencetransformers:
runs-on: ubuntu-latest
@@ -69,8 +69,8 @@ jobs:
- name: Test sentencetransformers
run: |
export PATH=$PATH:/opt/conda/bin
- make -C backend/python/sentencetransformers
- make -C backend/python/sentencetransformers test
+ make --jobs=5 --output-sync=target -C backend/python/sentencetransformers
+ make --jobs=5 --output-sync=target -C backend/python/sentencetransformers test
tests-diffusers:
runs-on: ubuntu-latest
@@ -98,8 +98,8 @@ jobs:
- name: Test diffusers
run: |
export PATH=$PATH:/opt/conda/bin
- make -C backend/python/diffusers
- make -C backend/python/diffusers test
+ make --jobs=5 --output-sync=target -C backend/python/diffusers
+ make --jobs=5 --output-sync=target -C backend/python/diffusers test
tests-transformers-musicgen:
@@ -128,8 +128,8 @@ jobs:
- name: Test transformers-musicgen
run: |
export PATH=$PATH:/opt/conda/bin
- make -C backend/python/transformers-musicgen
- make -C backend/python/transformers-musicgen test
+ make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen
+ make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test
@@ -159,8 +159,8 @@ jobs:
- name: Test petals
run: |
export PATH=$PATH:/opt/conda/bin
- make -C backend/python/petals
- make -C backend/python/petals test
+ make --jobs=5 --output-sync=target -C backend/python/petals
+ make --jobs=5 --output-sync=target -C backend/python/petals test
@@ -230,8 +230,8 @@ jobs:
# - name: Test bark
# run: |
# export PATH=$PATH:/opt/conda/bin
- # make -C backend/python/bark
- # make -C backend/python/bark test
+ # make --jobs=5 --output-sync=target -C backend/python/bark
+ # make --jobs=5 --output-sync=target -C backend/python/bark test
# Below tests needs GPU. Commented out for now
@@ -260,8 +260,8 @@ jobs:
# - name: Test vllm
# run: |
# export PATH=$PATH:/opt/conda/bin
- # make -C backend/python/vllm
- # make -C backend/python/vllm test
+ # make --jobs=5 --output-sync=target -C backend/python/vllm
+ # make --jobs=5 --output-sync=target -C backend/python/vllm test
tests-vallex:
runs-on: ubuntu-latest
steps:
@@ -286,8 +286,8 @@ jobs:
- name: Test vall-e-x
run: |
export PATH=$PATH:/opt/conda/bin
- make -C backend/python/vall-e-x
- make -C backend/python/vall-e-x test
+ make --jobs=5 --output-sync=target -C backend/python/vall-e-x
+ make --jobs=5 --output-sync=target -C backend/python/vall-e-x test
tests-coqui:
runs-on: ubuntu-latest
@@ -313,5 +313,5 @@ jobs:
- name: Test coqui
run: |
export PATH=$PATH:/opt/conda/bin
- make -C backend/python/coqui
- make -C backend/python/coqui test
+ make --jobs=5 --output-sync=target -C backend/python/coqui
+ make --jobs=5 --output-sync=target -C backend/python/coqui test
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 203aeeca..95d10862 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -9,6 +9,9 @@ on:
tags:
- '*'
+env:
+ GRPC_VERSION: v1.58.0
+
concurrency:
group: ci-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
cancel-in-progress: true
@@ -60,6 +63,7 @@ jobs:
uses: actions/setup-go@v4
with:
go-version: ${{ matrix.go-version }}
+ cache: false
# You can test your matrix by printing the current Go version
- name: Display Go version
run: go version
@@ -91,20 +95,20 @@ jobs:
uses: actions/cache@v3
with:
path: grpc
- key: ${{ runner.os }}-grpc
+ key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }}
- name: Build grpc
if: steps.cache-grpc.outputs.cache-hit != 'true'
run: |
- git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
+ git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --jobs 5 --shallow-submodules https://github.com/grpc/grpc && \
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
-DgRPC_BUILD_TESTS=OFF \
- ../.. && sudo make -j12
+ ../.. && sudo make --jobs 5
- name: Install gRPC
run: |
- cd grpc && cd cmake/build && sudo make -j12 install
+ cd grpc && cd cmake/build && sudo make --jobs 5 install
- name: Test
run: |
- GO_TAGS="stablediffusion tts" make test
+ GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3
@@ -151,7 +155,7 @@ jobs:
submodules: true
- name: Build images
run: |
- docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=core -t local-ai:tests -f Dockerfile .
+ docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=core --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
- name: Test
run: |
@@ -176,17 +180,20 @@ jobs:
uses: actions/setup-go@v4
with:
go-version: ${{ matrix.go-version }}
+ cache: false
# You can test your matrix by printing the current Go version
- name: Display Go version
run: go version
- name: Dependencies
run: |
- brew install protobuf grpc
+ brew install protobuf grpc make
- name: Test
run: |
export C_INCLUDE_PATH=/usr/local/include
export CPLUS_INCLUDE_PATH=/usr/local/include
- BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make test
+ # Used to run the newer GNUMake version from brew that supports --output-sync
+ export PATH="/opt/homebrew/opt/make/libexec/gnubin:$PATH"
+ BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make --jobs 4 --output-sync=target test
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3
diff --git a/Dockerfile b/Dockerfile
index 8725e76d..5fb6230c 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -90,11 +90,35 @@ RUN if [ ! -e /usr/bin/python ]; then \
###################################
###################################
+FROM ${BASE_IMAGE} as grpc
+
+ARG MAKEFLAGS
+ARG GRPC_VERSION=v1.58.0
+
+ENV MAKEFLAGS=${MAKEFLAGS}
+
+WORKDIR /build
+
+RUN apt-get update && \
+ apt-get install -y g++ cmake git && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists/*
+
+RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc
+
+RUN cd grpc && \
+ mkdir -p cmake/build && \
+ cd cmake/build && \
+ cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF ../.. && \
+ make
+
+###################################
+###################################
+
FROM requirements-${IMAGE_TYPE} as builder
ARG GO_TAGS="stablediffusion tts"
ARG GRPC_BACKENDS
-ARG BUILD_GRPC=true
ARG MAKEFLAGS
ENV GRPC_BACKENDS=${GRPC_BACKENDS}
@@ -121,12 +145,9 @@ RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
# stablediffusion does not tolerate a newer version of abseil, build it first
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
-RUN if [ "${BUILD_GRPC}" = "true" ]; then \
- git clone --recurse-submodules --jobs 4 -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
- cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
- -DgRPC_BUILD_TESTS=OFF \
- ../.. && make install \
- ; fi
+COPY --from=grpc /build/grpc ./grpc/
+
+RUN cd /build/grpc/cmake/build && make install
# Rebuild with defaults backends
RUN make build
@@ -179,7 +200,7 @@ WORKDIR /build
COPY . .
COPY --from=builder /build/sources ./sources/
-COPY --from=builder /build/grpc ./grpc/
+COPY --from=grpc /build/grpc ./grpc/
RUN make prepare-sources && cd /build/grpc/cmake/build && make install && rm -rf grpc
diff --git a/Makefile b/Makefile
index 6c02b80e..03764d86 100644
--- a/Makefile
+++ b/Makefile
@@ -355,7 +355,7 @@ prepare-e2e:
mkdir -p $(TEST_DIR)
cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
- docker build --build-arg BUILD_GRPC=true --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=11 --build-arg CUDA_MINOR_VERSION=7 --build-arg FFMPEG=true -t localai-tests .
+ docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=11 --build-arg CUDA_MINOR_VERSION=7 --build-arg FFMPEG=true -t localai-tests .
run-e2e-image:
ls -liah $(abspath ./tests/e2e-fixtures)
@@ -564,6 +564,7 @@ docker:
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg GO_TAGS="$(GO_TAGS)" \
+ --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
--build-arg BUILD_TYPE=$(BUILD_TYPE) \
-t $(DOCKER_IMAGE) .
@@ -571,6 +572,7 @@ docker-aio:
@echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)"
docker build \
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
+ --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
-t $(DOCKER_AIO_IMAGE) -f Dockerfile.aio .
docker-aio-all:
@@ -582,6 +584,7 @@ docker-image-intel:
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04 \
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg GO_TAGS="none" \
+ --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
docker-image-intel-xpu:
@@ -589,6 +592,7 @@ docker-image-intel-xpu:
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04 \
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg GO_TAGS="none" \
+ --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
.PHONY: swagger
From 92fbdfd06f0bf66f27e132f576fcb58cf9c8b9ef Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Fri, 29 Mar 2024 22:48:58 +0100
Subject: [PATCH 0068/2750] feat(swagger): update (#1929)
---
swagger/docs.go | 8 ++++----
swagger/swagger.json | 8 ++++----
swagger/swagger.yaml | 8 ++++----
3 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/swagger/docs.go b/swagger/docs.go
index a922fa2e..0f5c2c47 100644
--- a/swagger/docs.go
+++ b/swagger/docs.go
@@ -11,12 +11,12 @@ const docTemplate = `{
"description": "{{escape .Description}}",
"title": "{{.Title}}",
"contact": {
- "name": "OpenAI Support",
- "url": "https://help.openai.com/"
+ "name": "LocalAI",
+ "url": "https://localai.io"
},
"license": {
"name": "MIT",
- "url": "https://github.com/openai/openai-openapi/blob/master/LICENSE"
+ "url": "https://raw.githubusercontent.com/mudler/LocalAI/master/LICENSE"
},
"version": "{{.Version}}"
},
@@ -789,7 +789,7 @@ var SwaggerInfo = &swag.Spec{
BasePath: "/",
Schemes: []string{},
Title: "LocalAI API",
- Description: "The OpenAI REST API.",
+ Description: "The LocalAI Rest API.",
InfoInstanceName: "swagger",
SwaggerTemplate: docTemplate,
LeftDelim: "{{",
diff --git a/swagger/swagger.json b/swagger/swagger.json
index c9088d93..37dbec47 100644
--- a/swagger/swagger.json
+++ b/swagger/swagger.json
@@ -1,15 +1,15 @@
{
"swagger": "2.0",
"info": {
- "description": "The OpenAI REST API.",
+ "description": "The LocalAI Rest API.",
"title": "LocalAI API",
"contact": {
- "name": "OpenAI Support",
- "url": "https://help.openai.com/"
+ "name": "LocalAI",
+ "url": "https://localai.io"
},
"license": {
"name": "MIT",
- "url": "https://github.com/openai/openai-openapi/blob/master/LICENSE"
+ "url": "https://raw.githubusercontent.com/mudler/LocalAI/master/LICENSE"
},
"version": "2.0.0"
},
diff --git a/swagger/swagger.yaml b/swagger/swagger.yaml
index b33e1fab..91180359 100644
--- a/swagger/swagger.yaml
+++ b/swagger/swagger.yaml
@@ -368,12 +368,12 @@ definitions:
host: localhost:8080
info:
contact:
- name: OpenAI Support
- url: https://help.openai.com/
- description: The OpenAI REST API.
+ name: LocalAI
+ url: https://localai.io
+ description: The LocalAI Rest API.
license:
name: MIT
- url: https://github.com/openai/openai-openapi/blob/master/LICENSE
+ url: https://raw.githubusercontent.com/mudler/LocalAI/master/LICENSE
title: LocalAI API
version: 2.0.0
paths:
From bcdc83b46d90dfdaecbcba17a5a8a45702b6cf71 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Fri, 29 Mar 2024 23:00:06 +0100
Subject: [PATCH 0069/2750] Update quickstart.md
Signed-off-by: Ettore Di Giacinto
---
docs/content/docs/getting-started/quickstart.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/content/docs/getting-started/quickstart.md b/docs/content/docs/getting-started/quickstart.md
index 716fe154..c56dced5 100644
--- a/docs/content/docs/getting-started/quickstart.md
+++ b/docs/content/docs/getting-started/quickstart.md
@@ -112,7 +112,7 @@ docker run -p 8080:8080 --name local-ai -ti -v localai-models:/build/models loca
## Try it out
-LocalAI does not ship a webui by default, however you can use 3rd party projects to interact with it (see also [All-in-one Images]({{%relref "docs/integrations" %}}) ). However, you can test out the API endpoints using `curl`.
+LocalAI does not ship a webui by default, however you can use 3rd party projects to interact with it (see also [Integrations]({{%relref "docs/integrations" %}}) ). However, you can test out the API endpoints using `curl`, you can find few examples below.
### Text Generation
From 2bba62ca4daf407bd6c7b141f3a0f9b2e30b7599 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Fri, 29 Mar 2024 23:52:01 +0100
Subject: [PATCH 0070/2750] :arrow_up: Update ggerganov/llama.cpp (#1928)
Signed-off-by: GitHub
Co-authored-by: mudler
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index 03764d86..1a0d97e1 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=5106ef482c65ac60ac14da9a68c7b37bca4c6993
+CPPLLAMA_VERSION?=ba0c7c70ab5b15f1f2be7fb0dfbe0366dda30d6c
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
From eab4a91a9bdcd2e6000234860b578437e459da5b Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Sat, 30 Mar 2024 12:04:32 +0100
Subject: [PATCH 0071/2750] fix(aio): correctly detect intel systems (#1931)
Also rename SIZE to PROFILE
---
aio/entrypoint.sh | 119 +++++++++++++---------
docs/content/docs/reference/aio-images.md | 2 +-
2 files changed, 72 insertions(+), 49 deletions(-)
diff --git a/aio/entrypoint.sh b/aio/entrypoint.sh
index d04e5642..a2e040fa 100755
--- a/aio/entrypoint.sh
+++ b/aio/entrypoint.sh
@@ -5,54 +5,77 @@ echo "===> LocalAI All-in-One (AIO) container starting..."
GPU_ACCELERATION=false
GPU_VENDOR=""
+function check_intel() {
+ if lspci | grep -E 'VGA|3D' | grep -iq intel; then
+ echo "Intel GPU detected"
+ if [ -d /opt/intel ]; then
+ GPU_ACCELERATION=true
+ GPU_VENDOR=intel
+ else
+ echo "Intel GPU detected, but Intel GPU drivers are not installed. GPU acceleration will not be available."
+ fi
+ fi
+}
+
+function check_nvidia_wsl() {
+ if lspci | grep -E 'VGA|3D' | grep -iq "Microsoft Corporation Device 008e"; then
+ # We make the assumption this WSL2 cars is NVIDIA, then check for nvidia-smi
+ # Make sure the container was run with `--gpus all` as the only required parameter
+ echo "NVIDIA GPU detected via WSL2"
+ # nvidia-smi should be installed in the container
+ if nvidia-smi; then
+ GPU_ACCELERATION=true
+ GPU_VENDOR=nvidia
+ else
+ echo "NVIDIA GPU detected via WSL2, but nvidia-smi is not installed. GPU acceleration will not be available."
+ fi
+ fi
+}
+
+function check_amd() {
+ if lspci | grep -E 'VGA|3D' | grep -iq amd; then
+ echo "AMD GPU detected"
+ # Check if ROCm is installed
+ if [ -d /opt/rocm ]; then
+ GPU_ACCELERATION=true
+ GPU_VENDOR=amd
+ else
+ echo "AMD GPU detected, but ROCm is not installed. GPU acceleration will not be available."
+ fi
+ fi
+}
+
+function check_nvidia() {
+ if lspci | grep -E 'VGA|3D' | grep -iq nvidia; then
+ echo "NVIDIA GPU detected"
+ # nvidia-smi should be installed in the container
+ if nvidia-smi; then
+ GPU_ACCELERATION=true
+ GPU_VENDOR=nvidia
+ else
+ echo "NVIDIA GPU detected, but nvidia-smi is not installed. GPU acceleration will not be available."
+ fi
+ fi
+}
+
+function check_metal() {
+ if system_profiler SPDisplaysDataType | grep -iq 'Metal'; then
+ echo "Apple Metal supported GPU detected"
+ GPU_ACCELERATION=true
+ GPU_VENDOR=apple
+ fi
+}
+
function detect_gpu() {
case "$(uname -s)" in
Linux)
- if lspci | grep -E 'VGA|3D' | grep -iq nvidia; then
- echo "NVIDIA GPU detected"
- # nvidia-smi should be installed in the container
- if nvidia-smi; then
- GPU_ACCELERATION=true
- GPU_VENDOR=nvidia
- else
- echo "NVIDIA GPU detected, but nvidia-smi is not installed. GPU acceleration will not be available."
- fi
- elif lspci | grep -E 'VGA|3D' | grep -iq amd; then
- echo "AMD GPU detected"
- # Check if ROCm is installed
- if [ -d /opt/rocm ]; then
- GPU_ACCELERATION=true
- GPU_VENDOR=amd
- else
- echo "AMD GPU detected, but ROCm is not installed. GPU acceleration will not be available."
- fi
- elif lspci | grep -E 'VGA|3D' | grep -iq intel; then
- echo "Intel GPU detected"
- if [ -d /opt/intel ]; then
- GPU_ACCELERATION=true
- GPU_VENDOR=intel
- else
- echo "Intel GPU detected, but Intel GPU drivers are not installed. GPU acceleration will not be available."
- fi
- elif lspci | grep -E 'VGA|3D' | grep -iq "Microsoft Corporation Device 008e"; then
- # We make the assumption this WSL2 cars is NVIDIA, then check for nvidia-smi
- # Make sure the container was run with `--gpus all` as the only required parameter
- echo "NVIDIA GPU detected via WSL2"
- # nvidia-smi should be installed in the container
- if nvidia-smi; then
- GPU_ACCELERATION=true
- GPU_VENDOR=nvidia
- else
- echo "NVIDIA GPU detected via WSL2, but nvidia-smi is not installed. GPU acceleration will not be available."
- fi
- fi
+ check_nvidia
+ check_amd
+ check_intel
+ check_nvidia_wsl
;;
Darwin)
- if system_profiler SPDisplaysDataType | grep -iq 'Metal'; then
- echo "Apple Metal supported GPU detected"
- GPU_ACCELERATION=true
- GPU_VENDOR=apple
- fi
+ check_metal
;;
esac
}
@@ -96,8 +119,8 @@ function check_vars() {
exit 1
fi
- if [ -z "$SIZE" ]; then
- echo "SIZE environment variable is not set. Please set it to one of the following: cpu, gpu-8g, gpu-16g, apple"
+ if [ -z "$PROFILE" ]; then
+ echo "PROFILE environment variable is not set. Please set it to one of the following: cpu, gpu-8g, gpu-16g, apple"
exit 1
fi
}
@@ -105,11 +128,11 @@ function check_vars() {
detect_gpu
detect_gpu_size
-SIZE="${SIZE:-$GPU_SIZE}" # default to cpu
-export MODELS="${MODELS:-/aio/${SIZE}/embeddings.yaml,/aio/${SIZE}/text-to-speech.yaml,/aio/${SIZE}/image-gen.yaml,/aio/${SIZE}/text-to-text.yaml,/aio/${SIZE}/speech-to-text.yaml,/aio/${SIZE}/vision.yaml}"
+PROFILE="${PROFILE:-$GPU_SIZE}" # default to cpu
+export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vision.yaml}"
check_vars
-echo "Starting LocalAI with the following models: $MODELS"
+echo "===> Starting LocalAI[$PROFILE] with the following models: $MODELS"
/build/entrypoint.sh "$@"
diff --git a/docs/content/docs/reference/aio-images.md b/docs/content/docs/reference/aio-images.md
index c2cb57ba..40f01f06 100644
--- a/docs/content/docs/reference/aio-images.md
+++ b/docs/content/docs/reference/aio-images.md
@@ -46,7 +46,7 @@ The AIO Images are inheriting the same environment variables as the base images
| Variable | Default | Description |
| ---------------------| ------- | ----------- |
-| `SIZE` | Auto-detected | The size of the model to use. Available: `cpu`, `gpu-8g` |
+| `PROFILE` | Auto-detected | The size of the model to use. Available: `cpu`, `gpu-8g` |
| `MODELS` | Auto-detected | A list of models YAML Configuration file URI/URL (see also [running models]({{%relref "docs/getting-started/run-other-models" %}})) |
From 61e5e6bc36adb51b3ba29d27f5208222a8d69db3 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Sat, 30 Mar 2024 12:04:41 +0100
Subject: [PATCH 0072/2750] fix(swagger): do not specify a host (#1930)
In this way the requests are redirected to the host used by the client
to perform the request.
---
core/http/api.go | 1 -
swagger/docs.go | 2 +-
swagger/swagger.json | 1 -
swagger/swagger.yaml | 1 -
4 files changed, 1 insertion(+), 4 deletions(-)
diff --git a/core/http/api.go b/core/http/api.go
index ff413b0a..af38512a 100644
--- a/core/http/api.go
+++ b/core/http/api.go
@@ -51,7 +51,6 @@ func readAuthHeader(c *fiber.Ctx) string {
// @contact.url https://localai.io
// @license.name MIT
// @license.url https://raw.githubusercontent.com/mudler/LocalAI/master/LICENSE
-// @host localhost:8080
// @BasePath /
// @securityDefinitions.apikey BearerAuth
// @in header
diff --git a/swagger/docs.go b/swagger/docs.go
index 0f5c2c47..e0199673 100644
--- a/swagger/docs.go
+++ b/swagger/docs.go
@@ -785,7 +785,7 @@ const docTemplate = `{
// SwaggerInfo holds exported Swagger Info so clients can modify it
var SwaggerInfo = &swag.Spec{
Version: "2.0.0",
- Host: "localhost:8080",
+ Host: "",
BasePath: "/",
Schemes: []string{},
Title: "LocalAI API",
diff --git a/swagger/swagger.json b/swagger/swagger.json
index 37dbec47..4d7102c4 100644
--- a/swagger/swagger.json
+++ b/swagger/swagger.json
@@ -13,7 +13,6 @@
},
"version": "2.0.0"
},
- "host": "localhost:8080",
"basePath": "/",
"paths": {
"/v1/assistants": {
diff --git a/swagger/swagger.yaml b/swagger/swagger.yaml
index 91180359..86caff8a 100644
--- a/swagger/swagger.yaml
+++ b/swagger/swagger.yaml
@@ -365,7 +365,6 @@ definitions:
type:
type: string
type: object
-host: localhost:8080
info:
contact:
name: LocalAI
From 957f428fd5adacb12bc094ddfdc5f3c784dadbed Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Sat, 30 Mar 2024 19:02:07 +0100
Subject: [PATCH 0073/2750] fix(tools): correctly render tools response in
templates (#1932)
* fix(tools): allow to correctly display both Functions and Tools
* models(hermes-2-pro): correctly display function results
---
aio/gpu-8g/text-to-text.yaml | 21 ++++-----
aio/intel/text-to-text.yaml | 21 ++++-----
core/http/endpoints/openai/chat.go | 55 +++++++++++++++--------
embedded/models/hermes-2-pro-mistral.yaml | 21 ++++-----
4 files changed, 64 insertions(+), 54 deletions(-)
diff --git a/aio/gpu-8g/text-to-text.yaml b/aio/gpu-8g/text-to-text.yaml
index c6f26c07..1a67169b 100644
--- a/aio/gpu-8g/text-to-text.yaml
+++ b/aio/gpu-8g/text-to-text.yaml
@@ -3,30 +3,27 @@ mmap: true
parameters:
model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf
-roles:
- assistant_function_call: assistant
- function: tool
template:
chat_message: |
- <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "function"}}{{.Role}}{{else if eq .RoleName "user"}}user{{end}}
- {{ if eq .RoleName "assistant_function_call" }}{{end}}
- {{ if eq .RoleName "function" }}{{end}}
+ <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
+ {{ if .FunctionCall }}{{end}}
+ {{ if eq .RoleName "tool" }}{{end}}
{{if .Content}}{{.Content}}{{end}}
{{if .FunctionCall}}{{toJson .FunctionCall}}{{end}}
- {{ if eq .RoleName "assistant_function_call" }} {{end}}
- {{ if eq .RoleName "function" }} {{end}}
+ {{ if .FunctionCall }} {{end}}
+ {{ if eq .RoleName "tool" }}{{end}}
<|im_end|>
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
function: |
<|im_start|>system
- You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
+ You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
{{range .Functions}}
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
{{end}}
-
- Use the following pydantic model json schema for each tool call you will make:
- {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
+
+ Use the following pydantic model json schema for each tool call you will make:
+ {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
For each function call return a json object with function name and arguments within XML tags as follows:
{'arguments': , 'name': }
diff --git a/aio/intel/text-to-text.yaml b/aio/intel/text-to-text.yaml
index ef36b562..0577d19b 100644
--- a/aio/intel/text-to-text.yaml
+++ b/aio/intel/text-to-text.yaml
@@ -4,30 +4,27 @@ f16: false
parameters:
model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf
-roles:
- assistant_function_call: assistant
- function: tool
template:
chat_message: |
- <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "function"}}{{.Role}}{{else if eq .RoleName "user"}}user{{end}}
- {{ if eq .RoleName "assistant_function_call" }}{{end}}
- {{ if eq .RoleName "function" }}{{end}}
+ <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
+ {{ if .FunctionCall }}{{end}}
+ {{ if eq .RoleName "tool" }}{{end}}
{{if .Content}}{{.Content}}{{end}}
{{if .FunctionCall}}{{toJson .FunctionCall}}{{end}}
- {{ if eq .RoleName "assistant_function_call" }} {{end}}
- {{ if eq .RoleName "function" }} {{end}}
+ {{ if .FunctionCall }} {{end}}
+ {{ if eq .RoleName "tool" }}{{end}}
<|im_end|>
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
function: |
<|im_start|>system
- You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
+ You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
{{range .Functions}}
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
{{end}}
-
- Use the following pydantic model json schema for each tool call you will make:
- {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
+
+ Use the following pydantic model json schema for each tool call you will make:
+ {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
For each function call return a json object with function name and arguments within XML tags as follows:
{'arguments': , 'name': }
diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go
index c2e22962..f5f03eb4 100644
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -236,7 +236,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
// if function call, we might want to customize the role so we can display better that the "assistant called a json action"
// if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request
- if i.FunctionCall != nil && i.Role == "assistant" {
+ if (i.FunctionCall != nil || i.ToolCalls != nil) && i.Role == "assistant" {
roleFn := "assistant_function_call"
r := config.Roles[roleFn]
if r != "" {
@@ -246,6 +246,11 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
r := config.Roles[role]
contentExists := i.Content != nil && i.StringContent != ""
+ fcall := i.FunctionCall
+ if len(i.ToolCalls) > 0 {
+ fcall = i.ToolCalls
+ }
+
// First attempt to populate content via a chat message specific template
if config.TemplateConfig.ChatMessage != "" {
chatMessageData := model.ChatMessageTemplateData{
@@ -253,7 +258,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
Role: r,
RoleName: role,
Content: i.StringContent,
- FunctionCall: i.FunctionCall,
+ FunctionCall: fcall,
FunctionName: i.Name,
LastMessage: messageIndex == (len(input.Messages) - 1),
Function: config.Grammar != "" && (messageIndex == (len(input.Messages) - 1)),
@@ -271,35 +276,49 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
content = templatedChatMessage
}
}
+
+ marshalAnyRole := func(f any) {
+ j, err := json.Marshal(f)
+ if err == nil {
+ if contentExists {
+ content += "\n" + fmt.Sprint(r, " ", string(j))
+ } else {
+ content = fmt.Sprint(r, " ", string(j))
+ }
+ }
+ }
+ marshalAny := func(f any) {
+ j, err := json.Marshal(f)
+ if err == nil {
+ if contentExists {
+ content += "\n" + string(j)
+ } else {
+ content = string(j)
+ }
+ }
+ }
// If this model doesn't have such a template, or if that template fails to return a value, template at the message level.
if content == "" {
if r != "" {
if contentExists {
content = fmt.Sprint(r, i.StringContent)
}
+
if i.FunctionCall != nil {
- j, err := json.Marshal(i.FunctionCall)
- if err == nil {
- if contentExists {
- content += "\n" + fmt.Sprint(r, " ", string(j))
- } else {
- content = fmt.Sprint(r, " ", string(j))
- }
- }
+ marshalAnyRole(i.FunctionCall)
+ }
+ if i.ToolCalls != nil {
+ marshalAnyRole(i.ToolCalls)
}
} else {
if contentExists {
content = fmt.Sprint(i.StringContent)
}
if i.FunctionCall != nil {
- j, err := json.Marshal(i.FunctionCall)
- if err == nil {
- if contentExists {
- content += "\n" + string(j)
- } else {
- content = string(j)
- }
- }
+ marshalAny(i.FunctionCall)
+ }
+ if i.ToolCalls != nil {
+ marshalAny(i.ToolCalls)
}
}
// Special Handling: System. We care if it was printed at all, not the r branch, so check seperately
diff --git a/embedded/models/hermes-2-pro-mistral.yaml b/embedded/models/hermes-2-pro-mistral.yaml
index 84510d2a..108216f5 100644
--- a/embedded/models/hermes-2-pro-mistral.yaml
+++ b/embedded/models/hermes-2-pro-mistral.yaml
@@ -3,30 +3,27 @@ mmap: true
parameters:
model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf
-roles:
- assistant_function_call: assistant
- function: tool
template:
chat_message: |
- <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "function"}}{{.Role}}{{else if eq .RoleName "user"}}user{{end}}
- {{ if eq .RoleName "assistant_function_call" }}{{end}}
- {{ if eq .RoleName "function" }}{{end}}
+ <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
+ {{ if .FunctionCall }}{{end}}
+ {{ if eq .RoleName "tool" }}{{end}}
{{if .Content}}{{.Content}}{{end}}
{{if .FunctionCall}}{{toJson .FunctionCall}}{{end}}
- {{ if eq .RoleName "assistant_function_call" }} {{end}}
- {{ if eq .RoleName "function" }} {{end}}
+ {{ if .FunctionCall }} {{end}}
+ {{ if eq .RoleName "tool" }}{{end}}
<|im_end|>
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
function: |
<|im_start|>system
- You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
+ You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
{{range .Functions}}
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
{{end}}
-
- Use the following pydantic model json schema for each tool call you will make:
- {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
+
+ Use the following pydantic model json schema for each tool call you will make:
+ {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
For each function call return a json object with function name and arguments within XML tags as follows:
{'arguments': , 'name': }
From 831efa8893b6fe9b983ee1c28b74c1777da8b0cb Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sun, 31 Mar 2024 00:27:16 +0100
Subject: [PATCH 0074/2750] :arrow_up: Update ggerganov/whisper.cpp (#1933)
Signed-off-by: GitHub
Co-authored-by: mudler
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index 1a0d97e1..4561ea15 100644
--- a/Makefile
+++ b/Makefile
@@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
# whisper.cpp version
-WHISPER_CPP_VERSION?=1e8f28c42a1472ae7c49d0502ea06e2f5bc29a69
+WHISPER_CPP_VERSION?=ac283dbce7d42735e3ed985329037bf23fe180aa
# bert.cpp version
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
From 784657a652b152bea211d42a0f7b43c29ab4cad3 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sun, 31 Mar 2024 00:27:38 +0100
Subject: [PATCH 0075/2750] :arrow_up: Update ggerganov/llama.cpp (#1934)
Signed-off-by: GitHub
Co-authored-by: mudler
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index 4561ea15..474171bb 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=ba0c7c70ab5b15f1f2be7fb0dfbe0366dda30d6c
+CPPLLAMA_VERSION?=37e7854c104301c5b5323ccc40e07699f3a62c3e
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
From 35290e146b8b575cd691c844dd611ead3c111c0b Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Sun, 31 Mar 2024 13:04:09 +0200
Subject: [PATCH 0076/2750] fix(grammar): respect JSONmode and grammar from
user input (#1935)
* fix(grammar): Fix JSON mode and custom grammar
* tests(aio): add jsonmode test
* tests(aio): add functioncall test
* fix(aio): use hermes-2-pro-mistral as llm for CPU profile
* add phi-2-orange
---
aio/cpu/text-to-text.yaml | 31 ++++++++--
core/http/endpoints/openai/chat.go | 2 +
core/http/endpoints/openai/completion.go | 2 +
embedded/models/phi-2-orange.yaml | 30 +++++++++
tests/e2e-aio/e2e_test.go | 79 +++++++++++++++++++++++-
5 files changed, 139 insertions(+), 5 deletions(-)
create mode 100644 embedded/models/phi-2-orange.yaml
diff --git a/aio/cpu/text-to-text.yaml b/aio/cpu/text-to-text.yaml
index 4fd88500..aeb3c842 100644
--- a/aio/cpu/text-to-text.yaml
+++ b/aio/cpu/text-to-text.yaml
@@ -1,25 +1,48 @@
name: gpt-4
mmap: true
parameters:
- model: huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf
+ model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q2_K.gguf
template:
chat_message: |
- <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
+ <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
+ {{ if .FunctionCall }}{{end}}
+ {{ if eq .RoleName "tool" }}{{end}}
{{if .Content}}{{.Content}}{{end}}
+ {{if .FunctionCall}}{{toJson .FunctionCall}}{{end}}
+ {{ if .FunctionCall }} {{end}}
+ {{ if eq .RoleName "tool" }}{{end}}
<|im_end|>
+ # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
+ function: |
+ <|im_start|>system
+ You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
+
+ {{range .Functions}}
+ {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
+ {{end}}
+
+ Use the following pydantic model json schema for each tool call you will make:
+ {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
+ For each function call return a json object with function name and arguments within XML tags as follows:
+
+ {'arguments': , 'name': }
+ <|im_end|>
+ {{.Input}}
+ <|im_start|>assistant
+
chat: |
{{.Input}}
<|im_start|>assistant
completion: |
{{.Input}}
-context_size: 2048
+context_size: 4096
f16: true
stopwords:
- <|im_end|>
-
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
- "model": "phi-2-chat",
+ "model": "gpt-4",
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
}'
diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go
index f5f03eb4..837b6e12 100644
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -185,6 +185,8 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
input.Grammar = grammar.JSONBNF
}
+ config.Grammar = input.Grammar
+
// process functions if we have any defined or if we have a function call string
if len(input.Functions) > 0 && config.ShouldUseFunctions() {
log.Debug().Msgf("Response needs to process functions")
diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go
index a67f0993..69923475 100644
--- a/core/http/endpoints/openai/completion.go
+++ b/core/http/endpoints/openai/completion.go
@@ -73,6 +73,8 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
input.Grammar = grammar.JSONBNF
}
+ config.Grammar = input.Grammar
+
log.Debug().Msgf("Parameter Config: %+v", config)
if input.Stream {
diff --git a/embedded/models/phi-2-orange.yaml b/embedded/models/phi-2-orange.yaml
new file mode 100644
index 00000000..9207d283
--- /dev/null
+++ b/embedded/models/phi-2-orange.yaml
@@ -0,0 +1,30 @@
+name: phi-2-chat
+mmap: true
+parameters:
+ model: huggingface://l3utterfly/phi-2-orange-GGUF/phi-2-orange.Q6_K.gguf
+
+template:
+ chat_message: |
+ <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
+ {{if .Content}}{{.Content}}{{end}}
+ <|im_end|>
+ chat: |
+ {{.Input}}
+ <|im_start|>assistant
+ completion: |
+ {{.Input}}
+context_size: 4096
+f16: true
+stopwords:
+- <|im_end|>
+-
+
+description: |
+ This model is a chatbot that can be used for general conversation.
+ [Model card](https://huggingface.co/TheBloke/phi-2-orange-GGUF)
+
+usage: |
+ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+ "model": "phi-2-chat",
+ "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
+ }'
diff --git a/tests/e2e-aio/e2e_test.go b/tests/e2e-aio/e2e_test.go
index c52d789e..8fcd1280 100644
--- a/tests/e2e-aio/e2e_test.go
+++ b/tests/e2e-aio/e2e_test.go
@@ -2,6 +2,7 @@ package e2e_test
import (
"context"
+ "encoding/json"
"fmt"
"io"
"net/http"
@@ -9,8 +10,8 @@ import (
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
-
"github.com/sashabaranov/go-openai"
+ "github.com/sashabaranov/go-openai/jsonschema"
)
var _ = Describe("E2E test", func() {
@@ -40,6 +41,82 @@ var _ = Describe("E2E test", func() {
Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("4"), ContainSubstring("four")), fmt.Sprint(resp.Choices[0].Message.Content))
})
})
+
+ Context("function calls", func() {
+ It("correctly invoke", func() {
+ params := jsonschema.Definition{
+ Type: jsonschema.Object,
+ Properties: map[string]jsonschema.Definition{
+ "location": {
+ Type: jsonschema.String,
+ Description: "The city and state, e.g. San Francisco, CA",
+ },
+ "unit": {
+ Type: jsonschema.String,
+ Enum: []string{"celsius", "fahrenheit"},
+ },
+ },
+ Required: []string{"location"},
+ }
+
+ f := openai.FunctionDefinition{
+ Name: "get_current_weather",
+ Description: "Get the current weather in a given location",
+ Parameters: params,
+ }
+ t := openai.Tool{
+ Type: openai.ToolTypeFunction,
+ Function: &f,
+ }
+
+ dialogue := []openai.ChatCompletionMessage{
+ {Role: openai.ChatMessageRoleUser, Content: "What is the weather in Boston today?"},
+ }
+ resp, err := client.CreateChatCompletion(context.TODO(),
+ openai.ChatCompletionRequest{
+ Model: openai.GPT4,
+ Messages: dialogue,
+ Tools: []openai.Tool{t},
+ },
+ )
+ Expect(err).ToNot(HaveOccurred())
+ Expect(len(resp.Choices)).To(Equal(1), fmt.Sprint(resp))
+
+ msg := resp.Choices[0].Message
+ Expect(len(msg.ToolCalls)).To(Equal(1), fmt.Sprint(msg.ToolCalls))
+ Expect(msg.ToolCalls[0].Function.Name).To(Equal("get_current_weather"), fmt.Sprint(msg.ToolCalls[0].Function.Name))
+ Expect(msg.ToolCalls[0].Function.Arguments).To(ContainSubstring("Boston"), fmt.Sprint(msg.ToolCalls[0].Function.Arguments))
+ })
+ })
+ Context("json", func() {
+ It("correctly", func() {
+ model := "gpt-4"
+
+ req := openai.ChatCompletionRequest{
+ ResponseFormat: &openai.ChatCompletionResponseFormat{Type: openai.ChatCompletionResponseFormatTypeJSONObject},
+ Model: model,
+ Messages: []openai.ChatCompletionMessage{
+ {
+
+ Role: "user",
+ Content: "An animal with 'name', 'gender' and 'legs' fields",
+ },
+ },
+ }
+
+ resp, err := client.CreateChatCompletion(context.TODO(), req)
+ Expect(err).ToNot(HaveOccurred())
+ Expect(len(resp.Choices)).To(Equal(1), fmt.Sprint(resp))
+
+ var i map[string]interface{}
+ err = json.Unmarshal([]byte(resp.Choices[0].Message.Content), &i)
+ Expect(err).ToNot(HaveOccurred())
+ Expect(i).To(HaveKey("name"))
+ Expect(i).To(HaveKey("gender"))
+ Expect(i).To(HaveKey("legs"))
+ })
+ })
+
Context("images", func() {
It("correctly", func() {
resp, err := client.CreateImage(context.TODO(),
From 3c778b538aee121543ddaeb334cbb7f0e4790d98 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Sun, 31 Mar 2024 13:06:41 +0200
Subject: [PATCH 0077/2750] Update phi-2-orange.yaml
Signed-off-by: Ettore Di Giacinto
---
embedded/models/phi-2-orange.yaml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/embedded/models/phi-2-orange.yaml b/embedded/models/phi-2-orange.yaml
index 9207d283..838909c9 100644
--- a/embedded/models/phi-2-orange.yaml
+++ b/embedded/models/phi-2-orange.yaml
@@ -1,4 +1,4 @@
-name: phi-2-chat
+name: phi-2-orange
mmap: true
parameters:
model: huggingface://l3utterfly/phi-2-orange-GGUF/phi-2-orange.Q6_K.gguf
@@ -25,6 +25,6 @@ description: |
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
- "model": "phi-2-chat",
+ "model": "phi-2-orange",
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
}'
From 66f90f8dc1cb49d8926cfd5377b9409b4e8380e9 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Mon, 1 Apr 2024 08:59:23 +0200
Subject: [PATCH 0078/2750] :arrow_up: Update ggerganov/llama.cpp (#1937)
Signed-off-by: GitHub
Co-authored-by: mudler
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index 474171bb..2f80a121 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=37e7854c104301c5b5323ccc40e07699f3a62c3e
+CPPLLAMA_VERSION?=c50a82ce0f71558cbb8e555146ba124251504b38
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
From ebb1fcedea2f41292d0ce3e294f5df2375c69a0a Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Mon, 1 Apr 2024 11:48:35 +0200
Subject: [PATCH 0079/2750] fix(hermes-2-pro-mistral): add stopword for
toolcall (#1939)
Signed-off-by: Ettore Di Giacinto
---
aio/cpu/text-to-text.yaml | 1 +
aio/gpu-8g/text-to-text.yaml | 1 +
aio/intel/text-to-text.yaml | 1 +
embedded/models/hermes-2-pro-mistral.yaml | 1 +
4 files changed, 4 insertions(+)
diff --git a/aio/cpu/text-to-text.yaml b/aio/cpu/text-to-text.yaml
index aeb3c842..d30f403e 100644
--- a/aio/cpu/text-to-text.yaml
+++ b/aio/cpu/text-to-text.yaml
@@ -41,6 +41,7 @@ f16: true
stopwords:
- <|im_end|>
-
+- "\n "
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "gpt-4",
diff --git a/aio/gpu-8g/text-to-text.yaml b/aio/gpu-8g/text-to-text.yaml
index 1a67169b..1645a257 100644
--- a/aio/gpu-8g/text-to-text.yaml
+++ b/aio/gpu-8g/text-to-text.yaml
@@ -41,6 +41,7 @@ f16: true
stopwords:
- <|im_end|>
-
+- "\n "
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "gpt-4",
diff --git a/aio/intel/text-to-text.yaml b/aio/intel/text-to-text.yaml
index 0577d19b..3f3d2c39 100644
--- a/aio/intel/text-to-text.yaml
+++ b/aio/intel/text-to-text.yaml
@@ -40,6 +40,7 @@ template:
context_size: 4096
stopwords:
- <|im_end|>
+- "\n "
-
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
diff --git a/embedded/models/hermes-2-pro-mistral.yaml b/embedded/models/hermes-2-pro-mistral.yaml
index 108216f5..3792be78 100644
--- a/embedded/models/hermes-2-pro-mistral.yaml
+++ b/embedded/models/hermes-2-pro-mistral.yaml
@@ -41,6 +41,7 @@ f16: true
stopwords:
- <|im_end|>
-
+- "\n "
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "hermes-2-pro-mistral",
From e8f02c083f03b04ffc58eb8ba1f093c9227be0a5 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Mon, 1 Apr 2024 19:39:54 +0200
Subject: [PATCH 0080/2750] fix(functions): respect when selected from string
(#1940)
* fix(functions): respect when selected from string
* fix(toolschoice): decode both string and objects
---
core/config/backend_config.go | 7 ++++++-
core/http/endpoints/openai/request.go | 9 ++++++++-
2 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/core/config/backend_config.go b/core/config/backend_config.go
index 32e10a17..db9c6665 100644
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -185,7 +185,12 @@ func (c *BackendConfig) ShouldCallSpecificFunction() bool {
}
func (c *BackendConfig) FunctionToCall() string {
- return c.functionCallNameString
+ if c.functionCallNameString != "" &&
+ c.functionCallNameString != "none" && c.functionCallNameString != "auto" {
+ return c.functionCallNameString
+ }
+
+ return c.functionCallString
}
func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
diff --git a/core/http/endpoints/openai/request.go b/core/http/endpoints/openai/request.go
index 1f845c6f..c9981204 100644
--- a/core/http/endpoints/openai/request.go
+++ b/core/http/endpoints/openai/request.go
@@ -146,7 +146,14 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
if input.ToolsChoice != nil {
var toolChoice grammar.Tool
- json.Unmarshal([]byte(input.ToolsChoice.(string)), &toolChoice)
+
+ switch content := input.ToolsChoice.(type) {
+ case string:
+ _ = json.Unmarshal([]byte(content), &toolChoice)
+ case map[string]interface{}:
+ dat, _ := json.Marshal(content)
+ _ = json.Unmarshal(dat, &toolChoice)
+ }
input.FunctionCall = map[string]interface{}{
"name": toolChoice.Function.Name,
}
From 86bc5f1350c5841a2a7d029f5f53faf52d025fd1 Mon Sep 17 00:00:00 2001
From: cryptk <421501+cryptk@users.noreply.github.com>
Date: Tue, 2 Apr 2024 02:15:44 -0500
Subject: [PATCH 0081/2750] fix: use exec in entrypoint scripts to fix signal
handling (#1943)
---
aio/entrypoint.sh | 2 +-
entrypoint.sh | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/aio/entrypoint.sh b/aio/entrypoint.sh
index a2e040fa..5fd8d9c2 100755
--- a/aio/entrypoint.sh
+++ b/aio/entrypoint.sh
@@ -135,4 +135,4 @@ check_vars
echo "===> Starting LocalAI[$PROFILE] with the following models: $MODELS"
-/build/entrypoint.sh "$@"
+exec /build/entrypoint.sh "$@"
diff --git a/entrypoint.sh b/entrypoint.sh
index 05f67128..fb8417df 100755
--- a/entrypoint.sh
+++ b/entrypoint.sh
@@ -47,4 +47,4 @@ else
echo "@@@@@"
fi
-./local-ai "$@"
+exec ./local-ai "$@"
From 4d4d76114dc7c58f8e9504cb018138c311007824 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Tue, 2 Apr 2024 09:16:04 +0200
Subject: [PATCH 0082/2750] :arrow_up: Update ggerganov/llama.cpp (#1941)
Signed-off-by: GitHub
Co-authored-by: mudler
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index 2f80a121..a98eac67 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=c50a82ce0f71558cbb8e555146ba124251504b38
+CPPLLAMA_VERSION?=f87f7b898651339fe173ddf016ca826163e899d8
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
From 84e0dc3246fabd658ef8ba8ca983e8f5f36f3706 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Tue, 2 Apr 2024 15:38:00 +0200
Subject: [PATCH 0083/2750] fix(hermes-2-pro-mistral): correct stopwords
(#1947)
Signed-off-by: Ettore Di Giacinto
---
aio/cpu/text-to-text.yaml | 1 +
aio/gpu-8g/text-to-text.yaml | 1 +
aio/intel/text-to-text.yaml | 1 +
embedded/models/hermes-2-pro-mistral.yaml | 1 +
4 files changed, 4 insertions(+)
diff --git a/aio/cpu/text-to-text.yaml b/aio/cpu/text-to-text.yaml
index d30f403e..8a20109d 100644
--- a/aio/cpu/text-to-text.yaml
+++ b/aio/cpu/text-to-text.yaml
@@ -42,6 +42,7 @@ stopwords:
- <|im_end|>
-
- "\n "
+- "\n\n\n"
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "gpt-4",
diff --git a/aio/gpu-8g/text-to-text.yaml b/aio/gpu-8g/text-to-text.yaml
index 1645a257..9502cdfe 100644
--- a/aio/gpu-8g/text-to-text.yaml
+++ b/aio/gpu-8g/text-to-text.yaml
@@ -42,6 +42,7 @@ stopwords:
- <|im_end|>
-
- "\n"
+- "\n\n\n"
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "gpt-4",
diff --git a/aio/intel/text-to-text.yaml b/aio/intel/text-to-text.yaml
index 3f3d2c39..d2316745 100644
--- a/aio/intel/text-to-text.yaml
+++ b/aio/intel/text-to-text.yaml
@@ -42,6 +42,7 @@ stopwords:
- <|im_end|>
- "\n"
-
+- "\n\n\n"
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "gpt-4",
diff --git a/embedded/models/hermes-2-pro-mistral.yaml b/embedded/models/hermes-2-pro-mistral.yaml
index 3792be78..eb75b97c 100644
--- a/embedded/models/hermes-2-pro-mistral.yaml
+++ b/embedded/models/hermes-2-pro-mistral.yaml
@@ -42,6 +42,7 @@ stopwords:
- <|im_end|>
-
- "\n"
+- "\n\n\n"
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "hermes-2-pro-mistral",
From 9bc209ba731a6b5fafc5f6c646de563757b70ea3 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Tue, 2 Apr 2024 19:25:32 +0200
Subject: [PATCH 0084/2750] fix(welcome): stable model list (#1949)
---
core/config/backend_config.go | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/core/config/backend_config.go b/core/config/backend_config.go
index db9c6665..9b227578 100644
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -7,6 +7,7 @@ import (
"math/rand"
"os"
"path/filepath"
+ "sort"
"strings"
"sync"
@@ -455,6 +456,11 @@ func (cl *BackendConfigLoader) GetAllBackendConfigs() []BackendConfig {
for _, v := range cl.configs {
res = append(res, v)
}
+
+ sort.SliceStable(res, func(i, j int) bool {
+ return res[i].Name < res[j].Name
+ })
+
return res
}
From 89560ef87f5146a53cf3d0df704ee3eede88dd3f Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Tue, 2 Apr 2024 19:25:46 +0200
Subject: [PATCH 0085/2750] fix(ci): manually tag latest images (#1948)
fix(ci): manually tag images
Signed-off-by: Ettore Di Giacinto
---
.github/workflows/image.yml | 8 ++++++++
.github/workflows/image_build.yml | 21 +++++++++++++++++++++
2 files changed, 29 insertions(+)
diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index 79a38fc5..0c708b1d 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -28,6 +28,7 @@ jobs:
base-image: ${{ matrix.base-image }}
aio: ${{ matrix.aio }}
makeflags: ${{ matrix.makeflags }}
+ latest-image: ${{ matrix.latest-image }}
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
@@ -92,6 +93,7 @@ jobs:
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
aio: "-aio-gpu-nvidia-cuda-11"
+ latest-image: 'latest-aio-gpu-nvidia-cuda-11'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
@@ -104,6 +106,7 @@ jobs:
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
aio: "-aio-gpu-nvidia-cuda-12"
+ latest-image: 'latest-aio-gpu-nvidia-cuda-12'
makeflags: "--jobs=3 --output-sync=target"
- build-type: ''
#platforms: 'linux/amd64,linux/arm64'
@@ -123,6 +126,7 @@ jobs:
image-type: 'extras'
aio: "-aio-gpu-hipblas"
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
+ latest-image: 'latest-aio-gpu-hipblas'
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
@@ -143,6 +147,7 @@ jobs:
image-type: 'extras'
runs-on: 'arc-runner-set'
aio: "-aio-gpu-intel-f16"
+ latest-image: 'latest-aio-gpu-intel-f16'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'sycl_f32'
platforms: 'linux/amd64'
@@ -153,6 +158,7 @@ jobs:
image-type: 'extras'
runs-on: 'arc-runner-set'
aio: "-aio-gpu-intel-f32"
+ latest-image: 'latest-aio-gpu-intel-f32'
makeflags: "--jobs=3 --output-sync=target"
# Core images
- build-type: 'sycl_f16'
@@ -225,6 +231,7 @@ jobs:
aio: ${{ matrix.aio }}
base-image: ${{ matrix.base-image }}
makeflags: ${{ matrix.makeflags }}
+ latest-image: ${{ matrix.latest-image }}
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
@@ -242,6 +249,7 @@ jobs:
base-image: "ubuntu:22.04"
runs-on: 'ubuntu-latest'
aio: "-aio-cpu"
+ latest-image: 'latest-aio-cpu'
makeflags: "--jobs=5 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "11"
diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml
index d07df441..affa03bf 100644
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -29,6 +29,10 @@ on:
description: 'Tag latest'
default: ''
type: string
+ latest-image:
+ description: 'Tag latest'
+ default: ''
+ type: string
tag-suffix:
description: 'Tag suffix'
default: ''
@@ -266,6 +270,23 @@ jobs:
tags: ${{ steps.meta_aio_dockerhub.outputs.tags }}
labels: ${{ steps.meta_aio_dockerhub.outputs.labels }}
+ - name: Latest tag
+ # run this on branches, when it is a tag and there is a latest-image defined
+ if: github.event_name != 'pull_request' && inputs.latest-image != '' && github.ref_type == 'tag'
+ run: |
+ docker pull localai/localai:${{ steps.meta.outputs.version }}
+ docker tag localai/localai:${{ steps.meta.outputs.version }} localai/localai:${{ inputs.latest-image }}
+ docker pull quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
+ docker tag quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image }}
+ - name: Latest AIO tag
+ # run this on branches, when it is a tag and there is a latest-image defined
+ if: github.event_name != 'pull_request' && inputs.latest-image != '' && github.ref_type == 'tag'
+ run: |
+ docker pull localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }}
+ docker tag localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }} localai/localai:${{ inputs.latest-image }}
+ docker pull quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }}
+ docker tag quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image }}
+
- name: job summary
run: |
echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
From 93cfec3c326f98d9126dc0c835723a7e2ec5148d Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Wed, 3 Apr 2024 11:30:12 +0200
Subject: [PATCH 0086/2750] ci: correctly tag latest and aio images
---
.github/workflows/image.yml | 20 ++++++++++++++------
.github/workflows/image_build.yml | 10 +++++++---
2 files changed, 21 insertions(+), 9 deletions(-)
diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index 0c708b1d..d2607579 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -29,6 +29,7 @@ jobs:
aio: ${{ matrix.aio }}
makeflags: ${{ matrix.makeflags }}
latest-image: ${{ matrix.latest-image }}
+ latest-image-aio: ${{ matrix.latest-image-aio }}
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
@@ -93,7 +94,8 @@ jobs:
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
aio: "-aio-gpu-nvidia-cuda-11"
- latest-image: 'latest-aio-gpu-nvidia-cuda-11'
+ latest-image: 'latest-gpu-nvidia-cuda-11'
+ latest-image-aio: 'latest-aio-gpu-nvidia-cuda-11'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
@@ -106,7 +108,8 @@ jobs:
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
aio: "-aio-gpu-nvidia-cuda-12"
- latest-image: 'latest-aio-gpu-nvidia-cuda-12'
+ latest-image: 'latest-gpu-nvidia-cuda-12'
+ latest-image-aio: 'latest-aio-gpu-nvidia-cuda-12'
makeflags: "--jobs=3 --output-sync=target"
- build-type: ''
#platforms: 'linux/amd64,linux/arm64'
@@ -126,7 +129,8 @@ jobs:
image-type: 'extras'
aio: "-aio-gpu-hipblas"
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
- latest-image: 'latest-aio-gpu-hipblas'
+ latest-image: 'latest-gpu-hipblas'
+ latest-image-aio: 'latest-aio-gpu-hipblas'
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
@@ -147,7 +151,8 @@ jobs:
image-type: 'extras'
runs-on: 'arc-runner-set'
aio: "-aio-gpu-intel-f16"
- latest-image: 'latest-aio-gpu-intel-f16'
+ latest-image: 'latest-gpu-intel-f16'
+ latest-image-aio: 'latest-aio-gpu-intel-f16'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'sycl_f32'
platforms: 'linux/amd64'
@@ -158,7 +163,8 @@ jobs:
image-type: 'extras'
runs-on: 'arc-runner-set'
aio: "-aio-gpu-intel-f32"
- latest-image: 'latest-aio-gpu-intel-f32'
+ latest-image: 'latest-gpu-intel-f32'
+ latest-image-aio: 'latest-aio-gpu-intel-f32'
makeflags: "--jobs=3 --output-sync=target"
# Core images
- build-type: 'sycl_f16'
@@ -232,6 +238,7 @@ jobs:
base-image: ${{ matrix.base-image }}
makeflags: ${{ matrix.makeflags }}
latest-image: ${{ matrix.latest-image }}
+ latest-image-aio: ${{ matrix.latest-image-aio }}
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
@@ -249,7 +256,8 @@ jobs:
base-image: "ubuntu:22.04"
runs-on: 'ubuntu-latest'
aio: "-aio-cpu"
- latest-image: 'latest-aio-cpu'
+ latest-image: 'latest-cpu'
+ latest-image-aio: 'latest-aio-cpu'
makeflags: "--jobs=5 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "11"
diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml
index affa03bf..cba78933 100644
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -33,6 +33,10 @@ on:
description: 'Tag latest'
default: ''
type: string
+ latest-image-aio:
+ description: 'Tag latest'
+ default: ''
+ type: string
tag-suffix:
description: 'Tag suffix'
default: ''
@@ -280,12 +284,12 @@ jobs:
docker tag quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image }}
- name: Latest AIO tag
# run this on branches, when it is a tag and there is a latest-image defined
- if: github.event_name != 'pull_request' && inputs.latest-image != '' && github.ref_type == 'tag'
+ if: github.event_name != 'pull_request' && inputs.latest-image-aio != '' && github.ref_type == 'tag'
run: |
docker pull localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }}
- docker tag localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }} localai/localai:${{ inputs.latest-image }}
+ docker tag localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }} localai/localai:${{ inputs.latest-image-aio }}
docker pull quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }}
- docker tag quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image }}
+ docker tag quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
- name: job summary
run: |
From ff77d3bc22754ebac0aa624ed2f32ab355e8a310 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Wed, 3 Apr 2024 22:25:47 +0200
Subject: [PATCH 0087/2750] fix(seed): generate random seed per-request if -1
is set (#1952)
* fix(seed): generate random seed per-request if -1 is set
Also update ci with new workflows and allow the aio tests to run with an
api key
Signed-off-by: Ettore Di Giacinto
* docs(openvino): Add OpenVINO example
Signed-off-by: Ettore Di Giacinto
---------
Signed-off-by: Ettore Di Giacinto
---
.github/labeler.yml | 19 +++++++++++
.github/workflows/labeler.yml | 12 +++++++
.github/workflows/secscan.yaml | 27 ++++++++++++++++
core/backend/options.go | 15 +++++++--
core/config/backend_config.go | 7 ++--
docs/content/docs/features/text-generation.md | 32 +++++++++++++++++++
tests/e2e-aio/e2e_suite_test.go | 5 +--
7 files changed, 110 insertions(+), 7 deletions(-)
create mode 100644 .github/labeler.yml
create mode 100644 .github/workflows/labeler.yml
create mode 100644 .github/workflows/secscan.yaml
diff --git a/.github/labeler.yml b/.github/labeler.yml
new file mode 100644
index 00000000..64a88f43
--- /dev/null
+++ b/.github/labeler.yml
@@ -0,0 +1,19 @@
+enhancements:
+ - head-branch: ['^feature', 'feature']
+
+kind/documentation:
+- any:
+ - changed-files:
+ - any-glob-to-any-file: 'docs/*'
+ - changed-files:
+ - any-glob-to-any-file: '*.md'
+
+examples:
+- any:
+ - changed-files:
+ - any-glob-to-any-file: 'examples/*'
+
+ci:
+- any:
+ - changed-files:
+ - any-glob-to-any-file: '.github/*'
\ No newline at end of file
diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml
new file mode 100644
index 00000000..e3ecf923
--- /dev/null
+++ b/.github/workflows/labeler.yml
@@ -0,0 +1,12 @@
+name: "Pull Request Labeler"
+on:
+- pull_request_target
+
+jobs:
+ labeler:
+ permissions:
+ contents: read
+ pull-requests: write
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/labeler@v5
\ No newline at end of file
diff --git a/.github/workflows/secscan.yaml b/.github/workflows/secscan.yaml
new file mode 100644
index 00000000..a5221b40
--- /dev/null
+++ b/.github/workflows/secscan.yaml
@@ -0,0 +1,27 @@
+name: "Security Scan"
+
+# Run workflow each time code is pushed to your repository and on a schedule.
+# The scheduled workflow runs every at 00:00 on Sunday UTC time.
+on:
+ push:
+ schedule:
+ - cron: '0 0 * * 0'
+
+jobs:
+ tests:
+ runs-on: ubuntu-latest
+ env:
+ GO111MODULE: on
+ steps:
+ - name: Checkout Source
+ uses: actions/checkout@v3
+ - name: Run Gosec Security Scanner
+ uses: securego/gosec@master
+ with:
+ # we let the report trigger content trigger a failure using the GitHub Security features.
+ args: '-no-fail -fmt sarif -out results.sarif ./...'
+ - name: Upload SARIF file
+ uses: github/codeql-action/upload-sarif@v2
+ with:
+ # Path to SARIF file relative to the root of the repository
+ sarif_file: results.sarif
\ No newline at end of file
diff --git a/core/backend/options.go b/core/backend/options.go
index bc7fa5a4..143a9332 100644
--- a/core/backend/options.go
+++ b/core/backend/options.go
@@ -1,6 +1,7 @@
package backend
import (
+ "math/rand"
"os"
"path/filepath"
@@ -33,12 +34,20 @@ func modelOpts(c config.BackendConfig, so *config.ApplicationConfig, opts []mode
return opts
}
+func getSeed(c config.BackendConfig) int32 {
+ seed := int32(*c.Seed)
+ if seed == config.RAND_SEED {
+ seed = rand.Int31()
+ }
+
+ return seed
+}
+
func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
b := 512
if c.Batch != 0 {
b = c.Batch
}
-
return &pb.ModelOptions{
CUDA: c.CUDA || c.Diffusers.CUDA,
SchedulerType: c.Diffusers.SchedulerType,
@@ -54,7 +63,7 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
CLIPSkip: int32(c.Diffusers.ClipSkip),
ControlNet: c.Diffusers.ControlNet,
ContextSize: int32(*c.ContextSize),
- Seed: int32(*c.Seed),
+ Seed: getSeed(c),
NBatch: int32(b),
NoMulMatQ: c.NoMulMatQ,
DraftModel: c.DraftModel,
@@ -129,7 +138,7 @@ func gRPCPredictOpts(c config.BackendConfig, modelPath string) *pb.PredictOption
NKeep: int32(c.Keep),
Batch: int32(c.Batch),
IgnoreEOS: c.IgnoreEOS,
- Seed: int32(*c.Seed),
+ Seed: getSeed(c),
FrequencyPenalty: float32(c.FrequencyPenalty),
MLock: *c.MMlock,
MMap: *c.MMap,
diff --git a/core/config/backend_config.go b/core/config/backend_config.go
index 9b227578..25edd343 100644
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -4,7 +4,6 @@ import (
"errors"
"fmt"
"io/fs"
- "math/rand"
"os"
"path/filepath"
"sort"
@@ -20,6 +19,10 @@ import (
"github.com/charmbracelet/glamour"
)
+const (
+ RAND_SEED = -1
+)
+
type BackendConfig struct {
schema.PredictionOptions `yaml:"parameters"`
Name string `yaml:"name"`
@@ -218,7 +221,7 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
if cfg.Seed == nil {
// random number generator seed
- defaultSeed := int(rand.Int31())
+ defaultSeed := RAND_SEED
cfg.Seed = &defaultSeed
}
diff --git a/docs/content/docs/features/text-generation.md b/docs/content/docs/features/text-generation.md
index 1d0e1e9e..c11894e7 100644
--- a/docs/content/docs/features/text-generation.md
+++ b/docs/content/docs/features/text-generation.md
@@ -304,6 +304,7 @@ The backend will automatically download the required files in order to run the m
| Type | Description |
| --- | --- |
| `AutoModelForCausalLM` | `AutoModelForCausalLM` is a model that can be used to generate sequences. |
+| `OVModelForCausalLM` | for OpenVINO models |
| N/A | Defaults to `AutoModel` |
@@ -324,4 +325,35 @@ curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d
"prompt": "Hello, my name is",
"temperature": 0.1, "top_p": 0.1
}'
+```
+
+#### Examples
+
+##### OpenVINO
+
+A model configuration file for openvion and starling model:
+
+```yaml
+name: starling-openvino
+backend: transformers
+parameters:
+ model: fakezeta/Starling-LM-7B-beta-openvino-int8
+context_size: 8192
+threads: 6
+f16: true
+type: OVModelForCausalLM
+stopwords:
+- <|end_of_turn|>
+- <|endoftext|>
+prompt_cache_path: "cache"
+prompt_cache_all: true
+template:
+ chat_message: |
+ {{if eq .RoleName "system"}}{{.Content}}<|end_of_turn|>{{end}}{{if eq .RoleName "assistant"}}<|end_of_turn|>GPT4 Correct Assistant: {{.Content}}<|end_of_turn|>{{end}}{{if eq .RoleName "user"}}GPT4 Correct User: {{.Content}}{{end}}
+
+ chat: |
+ {{.Input}}<|end_of_turn|>GPT4 Correct Assistant:
+
+ completion: |
+ {{.Input}}
```
\ No newline at end of file
diff --git a/tests/e2e-aio/e2e_suite_test.go b/tests/e2e-aio/e2e_suite_test.go
index fa61c408..0aa68230 100644
--- a/tests/e2e-aio/e2e_suite_test.go
+++ b/tests/e2e-aio/e2e_suite_test.go
@@ -23,6 +23,7 @@ var containerImageTag = os.Getenv("LOCALAI_IMAGE_TAG")
var modelsDir = os.Getenv("LOCALAI_MODELS_DIR")
var apiPort = os.Getenv("LOCALAI_API_PORT")
var apiEndpoint = os.Getenv("LOCALAI_API_ENDPOINT")
+var apiKey = os.Getenv("LOCALAI_API_KEY")
func TestLocalAI(t *testing.T) {
RegisterFailHandler(Fail)
@@ -38,11 +39,11 @@ var _ = BeforeSuite(func() {
var defaultConfig openai.ClientConfig
if apiEndpoint == "" {
startDockerImage()
- defaultConfig = openai.DefaultConfig("")
+ defaultConfig = openai.DefaultConfig(apiKey)
defaultConfig.BaseURL = "http://localhost:" + apiPort + "/v1"
} else {
fmt.Println("Default ", apiEndpoint)
- defaultConfig = openai.DefaultConfig("")
+ defaultConfig = openai.DefaultConfig(apiKey)
defaultConfig.BaseURL = apiEndpoint
}
From 3851b51d98ee6dce4e05aa6b045e53917b39f267 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Thu, 4 Apr 2024 00:27:57 +0200
Subject: [PATCH 0088/2750] :arrow_up: Update ggerganov/llama.cpp (#1953)
Signed-off-by: GitHub
Co-authored-by: mudler
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index a98eac67..019078a3 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=f87f7b898651339fe173ddf016ca826163e899d8
+CPPLLAMA_VERSION?=60cdf40cc32f0ad4cb11e0ca8fd38f3b93d8d640
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
From b85dad0286304993b4fd32f22006d30c6c2fd337 Mon Sep 17 00:00:00 2001
From: cryptk <421501+cryptk@users.noreply.github.com>
Date: Thu, 4 Apr 2024 02:24:22 -0500
Subject: [PATCH 0089/2750] feat: first pass at improving logging (#1956)
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
---
.vscode/extensions.json | 5 +++++
core/config/application_config.go | 2 +-
core/http/endpoints/openai/chat.go | 10 +++++-----
core/http/endpoints/openai/files_test.go | 5 +++--
core/services/backend_monitor.go | 10 +++++-----
core/startup/config_file_watcher.go | 2 +-
core/startup/startup.go | 6 +++---
main.go | 10 +++++-----
pkg/gallery/models.go | 2 +-
pkg/model/initializers.go | 6 +++---
pkg/model/watchdog.go | 8 ++++----
pkg/startup/model_preload.go | 19 +++++++++++--------
pkg/utils/config.go | 11 ++++++-----
13 files changed, 53 insertions(+), 43 deletions(-)
create mode 100644 .vscode/extensions.json
diff --git a/.vscode/extensions.json b/.vscode/extensions.json
new file mode 100644
index 00000000..7203cb3f
--- /dev/null
+++ b/.vscode/extensions.json
@@ -0,0 +1,5 @@
+{
+ "recommendations": [
+ "golang.go"
+ ]
+}
\ No newline at end of file
diff --git a/core/config/application_config.go b/core/config/application_config.go
index 49b35f97..9525553a 100644
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@@ -169,7 +169,7 @@ func WithStringGalleries(galls string) AppOption {
}
var galleries []gallery.Gallery
if err := json.Unmarshal([]byte(galls), &galleries); err != nil {
- log.Error().Msgf("failed loading galleries: %s", err.Error())
+ log.Error().Err(err).Msg("failed loading galleries")
}
o.Galleries = append(o.Galleries, galleries...)
}
diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go
index 837b6e12..871ae6c1 100644
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -84,7 +84,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
result, err := handleQuestion(config, req, ml, startupOptions, results[0].arguments, prompt)
if err != nil {
- log.Error().Msgf("error handling question: %s", err.Error())
+ log.Error().Err(err).Msg("error handling question")
return
}
@@ -268,7 +268,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
}
templatedChatMessage, err := ml.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData)
if err != nil {
- log.Error().Msgf("error processing message %+v using template \"%s\": %v. Skipping!", chatMessageData, config.TemplateConfig.ChatMessage, err)
+ log.Error().Err(err).Interface("message", chatMessageData).Str("template", config.TemplateConfig.ChatMessage).Msg("error processing message with template, skipping")
} else {
if templatedChatMessage == "" {
log.Warn().Msgf("template \"%s\" produced blank output for %+v. Skipping!", config.TemplateConfig.ChatMessage, chatMessageData)
@@ -455,7 +455,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
case noActionsToRun:
result, err := handleQuestion(config, input, ml, startupOptions, results[0].arguments, predInput)
if err != nil {
- log.Error().Msgf("error handling question: %s", err.Error())
+ log.Error().Err(err).Msg("error handling question")
return
}
*c = append(*c, schema.Choice{
@@ -565,13 +565,13 @@ func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, m
predFunc, err := backend.ModelInference(input.Context, prompt, images, ml, *config, o, nil)
if err != nil {
- log.Error().Msgf("inference error: %s", err.Error())
+ log.Error().Err(err).Msg("model inference failed")
return "", err
}
prediction, err := predFunc()
if err != nil {
- log.Error().Msgf("inference error: %s", err.Error())
+ log.Error().Err(err).Msg("prediction failed")
return "", err
}
return backend.Finetune(*config, prompt, prediction.Response), nil
diff --git a/core/http/endpoints/openai/files_test.go b/core/http/endpoints/openai/files_test.go
index e1c1011e..fc77ae45 100644
--- a/core/http/endpoints/openai/files_test.go
+++ b/core/http/endpoints/openai/files_test.go
@@ -3,7 +3,6 @@ package openai
import (
"encoding/json"
"fmt"
- "github.com/rs/zerolog/log"
"io"
"mime/multipart"
"net/http"
@@ -12,6 +11,8 @@ import (
"path/filepath"
"strings"
+ "github.com/rs/zerolog/log"
+
"github.com/go-skynet/LocalAI/core/config"
utils2 "github.com/go-skynet/LocalAI/pkg/utils"
@@ -297,7 +298,7 @@ func responseToListFile(t *testing.T, resp *http.Response) ListFiles {
err := json.NewDecoder(strings.NewReader(responseToString)).Decode(&listFiles)
if err != nil {
- log.Error().Msgf("Failed to decode response: %s", err)
+ log.Error().Err(err).Msg("failed to decode response")
}
return listFiles
diff --git a/core/services/backend_monitor.go b/core/services/backend_monitor.go
index 88176753..979a67a3 100644
--- a/core/services/backend_monitor.go
+++ b/core/services/backend_monitor.go
@@ -63,7 +63,7 @@ func (bm *BackendMonitor) SampleLocalBackendProcess(model string) (*schema.Backe
pid, err := bm.modelLoader.GetGRPCPID(backend)
if err != nil {
- log.Error().Msgf("model %s : failed to find pid %+v", model, err)
+ log.Error().Err(err).Str("model", model).Msg("failed to find GRPC pid")
return nil, err
}
@@ -71,26 +71,26 @@ func (bm *BackendMonitor) SampleLocalBackendProcess(model string) (*schema.Backe
backendProcess, err := gopsutil.NewProcess(int32(pid))
if err != nil {
- log.Error().Msgf("model %s [PID %d] : error getting process info %+v", model, pid, err)
+ log.Error().Err(err).Str("model", model).Int("pid", pid).Msg("error getting process info")
return nil, err
}
memInfo, err := backendProcess.MemoryInfo()
if err != nil {
- log.Error().Msgf("model %s [PID %d] : error getting memory info %+v", model, pid, err)
+ log.Error().Err(err).Str("model", model).Int("pid", pid).Msg("error getting memory info")
return nil, err
}
memPercent, err := backendProcess.MemoryPercent()
if err != nil {
- log.Error().Msgf("model %s [PID %d] : error getting memory percent %+v", model, pid, err)
+ log.Error().Err(err).Str("model", model).Int("pid", pid).Msg("error getting memory percent")
return nil, err
}
cpuPercent, err := backendProcess.CPUPercent()
if err != nil {
- log.Error().Msgf("model %s [PID %d] : error getting cpu percent %+v", model, pid, err)
+ log.Error().Err(err).Str("model", model).Int("pid", pid).Msg("error getting cpu percent")
return nil, err
}
diff --git a/core/startup/config_file_watcher.go b/core/startup/config_file_watcher.go
index 0c7eff2d..9c758e25 100644
--- a/core/startup/config_file_watcher.go
+++ b/core/startup/config_file_watcher.go
@@ -85,7 +85,7 @@ func WatchConfigDirectory(configDir string, appConfig *config.ApplicationConfig)
if !ok {
return
}
- log.Error().Msgf("WatchConfigDirectory goroutine error: %+v", err)
+ log.Error().Err(err).Msg("error encountered while watching config directory")
}
}
}()
diff --git a/core/startup/startup.go b/core/startup/startup.go
index 828eb7a7..6298f034 100644
--- a/core/startup/startup.go
+++ b/core/startup/startup.go
@@ -61,17 +61,17 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode
configLoaderOpts := options.ToConfigLoaderOptions()
if err := cl.LoadBackendConfigsFromPath(options.ModelPath, configLoaderOpts...); err != nil {
- log.Error().Msgf("error loading config files: %s", err.Error())
+ log.Error().Err(err).Msg("error loading config files")
}
if options.ConfigFile != "" {
if err := cl.LoadBackendConfigFile(options.ConfigFile, configLoaderOpts...); err != nil {
- log.Error().Msgf("error loading config file: %s", err.Error())
+ log.Error().Err(err).Msg("error loading config file")
}
}
if err := cl.Preload(options.ModelPath); err != nil {
- log.Error().Msgf("error downloading models: %s", err.Error())
+ log.Error().Err(err).Msg("error downloading models")
}
if options.PreloadJSONModels != "" {
diff --git a/main.go b/main.go
index 0d8befcb..53966ba5 100644
--- a/main.go
+++ b/main.go
@@ -45,7 +45,7 @@ func main() {
path, err := os.Getwd()
if err != nil {
- log.Error().Msgf("error: %s", err.Error())
+ log.Error().Err(err).Msg("failed to get current directory")
os.Exit(1)
}
@@ -340,7 +340,7 @@ For a list of compatible model, check out: https://localai.io/model-compatibilit
appHTTP, err := http.App(cl, ml, options)
if err != nil {
- log.Error().Msg("Error during HTTP App constructor")
+ log.Error().Err(err).Msg("error during HTTP App construction")
return err
}
@@ -357,7 +357,7 @@ For a list of compatible model, check out: https://localai.io/model-compatibilit
Action: func(ctx *cli.Context) error {
var galleries []gallery.Gallery
if err := json.Unmarshal([]byte(ctx.String("galleries")), &galleries); err != nil {
- log.Error().Msgf("unable to load galleries: %s", err.Error())
+ log.Error().Err(err).Msg("unable to load galleries")
}
models, err := gallery.AvailableGalleryModels(galleries, ctx.String("models-path"))
@@ -382,7 +382,7 @@ For a list of compatible model, check out: https://localai.io/model-compatibilit
var galleries []gallery.Gallery
if err := json.Unmarshal([]byte(ctx.String("galleries")), &galleries); err != nil {
- log.Error().Msgf("unable to load galleries: %s", err.Error())
+ log.Error().Err(err).Msg("unable to load galleries")
}
progressBar := progressbar.NewOptions(
@@ -547,7 +547,7 @@ For a list of compatible model, check out: https://localai.io/model-compatibilit
err = app.Run(os.Args)
if err != nil {
- log.Error().Msgf("error: %s", err.Error())
+ log.Error().Err(err).Msg("application runtime error")
os.Exit(1)
}
}
diff --git a/pkg/gallery/models.go b/pkg/gallery/models.go
index 65d0401f..10caedee 100644
--- a/pkg/gallery/models.go
+++ b/pkg/gallery/models.go
@@ -67,7 +67,7 @@ func GetGalleryConfigFromURL(url string) (Config, error) {
return yaml.Unmarshal(d, &config)
})
if err != nil {
- log.Error().Msgf("GetGalleryConfigFromURL error for url %s\n%s", url, err.Error())
+ log.Error().Err(err).Str("url", url).Msg("failed to get gallery config for url")
return config, err
}
return config, nil
diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go
index 85744f9a..5d9808a4 100644
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -15,8 +15,8 @@ import (
)
var Aliases map[string]string = map[string]string{
- "go-llama": LLamaCPP,
- "llama": LLamaCPP,
+ "go-llama": LLamaCPP,
+ "llama": LLamaCPP,
"embedded-store": LocalStoreBackend,
}
@@ -127,7 +127,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
break
}
if err != nil && i == o.grpcAttempts-1 {
- log.Error().Msgf("Failed starting/connecting to the gRPC service: %s", err.Error())
+ log.Error().Err(err).Msg("failed starting/connecting to the gRPC service")
}
time.Sleep(time.Duration(o.grpcAttemptsDelay) * time.Second)
}
diff --git a/pkg/model/watchdog.go b/pkg/model/watchdog.go
index c93cb99a..b5381832 100644
--- a/pkg/model/watchdog.go
+++ b/pkg/model/watchdog.go
@@ -110,10 +110,10 @@ func (wd *WatchDog) checkIdle() {
log.Debug().Msgf("[WatchDog] %s: idle connection", address)
if time.Since(t) > wd.idletimeout {
log.Warn().Msgf("[WatchDog] Address %s is idle for too long, killing it", address)
- p, ok := wd.addressModelMap[address]
+ model, ok := wd.addressModelMap[address]
if ok {
- if err := wd.pm.ShutdownModel(p); err != nil {
- log.Error().Msgf("[watchdog] Error shutting down model %s: %v", p, err)
+ if err := wd.pm.ShutdownModel(model); err != nil {
+ log.Error().Err(err).Str("model", model).Msg("[watchdog] error shutting down model")
}
log.Debug().Msgf("[WatchDog] model shut down: %s", address)
delete(wd.idleTime, address)
@@ -141,7 +141,7 @@ func (wd *WatchDog) checkBusy() {
if ok {
log.Warn().Msgf("[WatchDog] Model %s is busy for too long, killing it", model)
if err := wd.pm.ShutdownModel(model); err != nil {
- log.Error().Msgf("[watchdog] Error shutting down model %s: %v", model, err)
+ log.Error().Err(err).Str("model", model).Msg("[watchdog] error shutting down model")
}
log.Debug().Msgf("[WatchDog] model shut down: %s", address)
delete(wd.timetable, address)
diff --git a/pkg/startup/model_preload.go b/pkg/startup/model_preload.go
index 979b4d83..b09516a7 100644
--- a/pkg/startup/model_preload.go
+++ b/pkg/startup/model_preload.go
@@ -35,14 +35,15 @@ func PreloadModelsConfigurations(modelLibraryURL string, modelPath string, model
modelYAML, err := embedded.ResolveContent(url)
// If we resolve something, just save it to disk and continue
if err != nil {
- log.Error().Msgf("error loading model: %s", err.Error())
+ log.Error().Err(err).Msg("error resolving model content")
continue
}
log.Debug().Msgf("[startup] resolved embedded model: %s", url)
md5Name := utils.MD5(url)
- if err := os.WriteFile(filepath.Join(modelPath, md5Name)+".yaml", modelYAML, os.ModePerm); err != nil {
- log.Error().Msgf("error loading model: %s", err.Error())
+ modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml"
+ if err := os.WriteFile(modelDefinitionFilePath, modelYAML, os.ModePerm); err != nil {
+ log.Error().Err(err).Str("filepath", modelDefinitionFilePath).Msg("error writing model definition")
}
case downloader.LooksLikeURL(url):
log.Debug().Msgf("[startup] resolved model to download: %s", url)
@@ -52,11 +53,12 @@ func PreloadModelsConfigurations(modelLibraryURL string, modelPath string, model
// check if file exists
if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) {
- err := downloader.DownloadFile(url, filepath.Join(modelPath, md5Name)+".yaml", "", func(fileName, current, total string, percent float64) {
+ modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml"
+ err := downloader.DownloadFile(url, modelDefinitionFilePath, "", func(fileName, current, total string, percent float64) {
utils.DisplayDownloadFunction(fileName, current, total, percent)
})
if err != nil {
- log.Error().Msgf("error loading model: %s", err.Error())
+ log.Error().Err(err).Str("url", url).Str("filepath", modelDefinitionFilePath).Msg("error downloading model")
}
}
default:
@@ -67,12 +69,13 @@ func PreloadModelsConfigurations(modelLibraryURL string, modelPath string, model
modelYAML, err := os.ReadFile(url)
if err != nil {
- log.Error().Msgf("error loading model: %s", err.Error())
+ log.Error().Err(err).Str("filepath", url).Msg("error reading model definition")
continue
}
- if err := os.WriteFile(filepath.Join(modelPath, md5Name)+".yaml", modelYAML, os.ModePerm); err != nil {
- log.Error().Msgf("error loading model: %s", err.Error())
+ modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml"
+ if err := os.WriteFile(modelDefinitionFilePath, modelYAML, os.ModePerm); err != nil {
+ log.Error().Err(err).Str("filepath", modelDefinitionFilePath).Msg("error loading model: %s")
}
} else {
log.Warn().Msgf("[startup] failed resolving model '%s'", url)
diff --git a/pkg/utils/config.go b/pkg/utils/config.go
index a9167ed3..929e1f9f 100644
--- a/pkg/utils/config.go
+++ b/pkg/utils/config.go
@@ -2,21 +2,22 @@ package utils
import (
"encoding/json"
- "github.com/rs/zerolog/log"
"os"
"path/filepath"
+
+ "github.com/rs/zerolog/log"
)
func SaveConfig(filePath, fileName string, obj any) {
file, err := json.MarshalIndent(obj, "", " ")
if err != nil {
- log.Error().Msgf("Failed to JSON marshal the uploadedFiles: %s", err)
+ log.Error().Err(err).Msg("failed to JSON marshal the uploadedFiles")
}
absolutePath := filepath.Join(filePath, fileName)
err = os.WriteFile(absolutePath, file, 0644)
if err != nil {
- log.Error().Msgf("Failed to save configuration file to %s: %s", absolutePath, err)
+ log.Error().Err(err).Str("filepath", absolutePath).Msg("failed to save configuration file")
}
}
@@ -31,11 +32,11 @@ func LoadConfig(filePath, fileName string, obj interface{}) {
file, err := os.ReadFile(uploadFilePath)
if err != nil {
- log.Error().Msgf("Failed to read file: %s", err)
+ log.Error().Err(err).Str("filepath", uploadFilePath).Msg("failed to read file")
} else {
err = json.Unmarshal(file, &obj)
if err != nil {
- log.Error().Msgf("Failed to JSON unmarshal the file %s: %v", uploadFilePath, err)
+ log.Error().Err(err).Str("filepath", uploadFilePath).Msg("failed to parse file as JSON")
}
}
}
From f744e1f9317cf66f8812109d1601b0db07dd9883 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Fri, 5 Apr 2024 08:41:35 +0200
Subject: [PATCH 0090/2750] :arrow_up: Update ggerganov/whisper.cpp (#1958)
Signed-off-by: GitHub
Co-authored-by: mudler
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index 019078a3..dab1a0cb 100644
--- a/Makefile
+++ b/Makefile
@@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
# whisper.cpp version
-WHISPER_CPP_VERSION?=ac283dbce7d42735e3ed985329037bf23fe180aa
+WHISPER_CPP_VERSION?=1d7657f40974e251ea42275e155a8abfb24228ef
# bert.cpp version
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
From b2d9e3f7044a3c4853274609012e96670ce45bec Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Fri, 5 Apr 2024 08:41:55 +0200
Subject: [PATCH 0091/2750] :arrow_up: Update ggerganov/llama.cpp (#1959)
Signed-off-by: GitHub
Co-authored-by: mudler
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index dab1a0cb..08e2ef3e 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=60cdf40cc32f0ad4cb11e0ca8fd38f3b93d8d640
+CPPLLAMA_VERSION?=a307375c02cac45cff53cf2520330b43fecc7718
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
From 8aa5f5a660987553452fb6b160281b5c573e579f Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sat, 6 Apr 2024 21:15:25 +0200
Subject: [PATCH 0092/2750] :arrow_up: Update ggerganov/llama.cpp (#1960)
Signed-off-by: GitHub
Co-authored-by: mudler
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index 08e2ef3e..938ee989 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=a307375c02cac45cff53cf2520330b43fecc7718
+CPPLLAMA_VERSION?=a8bd14d55717754a1f48313a846a2b16fa998ad2
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
From 83425532144fbc4fbc9aa734aab8f822a0a2ddf4 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Sat, 6 Apr 2024 22:56:45 +0200
Subject: [PATCH 0093/2750] fix(llama.cpp): set better defaults for llama.cpp
(#1961)
fix(defaults): set better defaults for llama.cpp
Signed-off-by: Ettore Di Giacinto
---
core/backend/options.go | 4 ++--
core/config/backend_config.go | 15 +++++++++++++--
core/http/endpoints/openai/request.go | 6 +++---
core/schema/prediction.go | 10 +++++-----
4 files changed, 23 insertions(+), 12 deletions(-)
diff --git a/core/backend/options.go b/core/backend/options.go
index 143a9332..5b303b05 100644
--- a/core/backend/options.go
+++ b/core/backend/options.go
@@ -144,7 +144,7 @@ func gRPCPredictOpts(c config.BackendConfig, modelPath string) *pb.PredictOption
MMap: *c.MMap,
MainGPU: c.MainGPU,
TensorSplit: c.TensorSplit,
- TailFreeSamplingZ: float32(c.TFZ),
- TypicalP: float32(c.TypicalP),
+ TailFreeSamplingZ: float32(*c.TFZ),
+ TypicalP: float32(*c.TypicalP),
}
}
diff --git a/core/config/backend_config.go b/core/config/backend_config.go
index 25edd343..a90b1c1b 100644
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -205,13 +205,16 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
threads := lo.threads
f16 := lo.f16
debug := lo.debug
- defaultTopP := 0.7
- defaultTopK := 80
+ // https://github.com/ggerganov/llama.cpp/blob/75cd4c77292034ecec587ecb401366f57338f7c0/common/sampling.h#L22
+ defaultTopP := 0.95
+ defaultTopK := 40
defaultTemp := 0.9
defaultMaxTokens := 2048
defaultMirostat := 2
defaultMirostatTAU := 5.0
defaultMirostatETA := 0.1
+ defaultTypicalP := 1.0
+ defaultTFZ := 1.0
// Try to offload all GPU layers (if GPU is found)
defaultNGPULayers := 99999999
@@ -229,6 +232,14 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
cfg.TopK = &defaultTopK
}
+ if cfg.TypicalP == nil {
+ cfg.TypicalP = &defaultTypicalP
+ }
+
+ if cfg.TFZ == nil {
+ cfg.TFZ = &defaultTFZ
+ }
+
if cfg.MMap == nil {
// MMap is enabled by default
cfg.MMap = &trueV
diff --git a/core/http/endpoints/openai/request.go b/core/http/endpoints/openai/request.go
index c9981204..369fb0b8 100644
--- a/core/http/endpoints/openai/request.go
+++ b/core/http/endpoints/openai/request.go
@@ -192,11 +192,11 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
config.RepeatPenalty = input.RepeatPenalty
}
- if input.FrequencyPenalty!= 0 {
+ if input.FrequencyPenalty != 0 {
config.FrequencyPenalty = input.FrequencyPenalty
}
- if input.PresencePenalty!= 0 {
+ if input.PresencePenalty != 0 {
config.PresencePenalty = input.PresencePenalty
}
@@ -216,7 +216,7 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
config.Seed = input.Seed
}
- if input.TypicalP != 0 {
+ if input.TypicalP != nil {
config.TypicalP = input.TypicalP
}
diff --git a/core/schema/prediction.go b/core/schema/prediction.go
index 4933f2d2..7e509167 100644
--- a/core/schema/prediction.go
+++ b/core/schema/prediction.go
@@ -24,12 +24,12 @@ type PredictionOptions struct {
RepeatPenalty float64 `json:"repeat_penalty" yaml:"repeat_penalty"`
Keep int `json:"n_keep" yaml:"n_keep"`
- FrequencyPenalty float64 `json:"frequency_penalty" yaml:"frequency_penalty"`
- PresencePenalty float64 `json:"presence_penalty" yaml:"presence_penalty"`
- TFZ float64 `json:"tfz" yaml:"tfz"`
+ FrequencyPenalty float64 `json:"frequency_penalty" yaml:"frequency_penalty"`
+ PresencePenalty float64 `json:"presence_penalty" yaml:"presence_penalty"`
+ TFZ *float64 `json:"tfz" yaml:"tfz"`
- TypicalP float64 `json:"typical_p" yaml:"typical_p"`
- Seed *int `json:"seed" yaml:"seed"`
+ TypicalP *float64 `json:"typical_p" yaml:"typical_p"`
+ Seed *int `json:"seed" yaml:"seed"`
NegativePrompt string `json:"negative_prompt" yaml:"negative_prompt"`
RopeFreqBase float32 `json:"rope_freq_base" yaml:"rope_freq_base"`
From ed1378298677ebdc53f2c5e930941ca8e92e90ae Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sun, 7 Apr 2024 10:32:10 +0200
Subject: [PATCH 0094/2750] :arrow_up: Update ggerganov/llama.cpp (#1964)
Signed-off-by: GitHub
Co-authored-by: mudler
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index 938ee989..290a4adc 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=a8bd14d55717754a1f48313a846a2b16fa998ad2
+CPPLLAMA_VERSION?=54ea0698fbf87e36a5d68a98c95f6bdd0fb91557
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
From 74492a81c70603547a717e45d00d532cd2017244 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Sun, 7 Apr 2024 11:06:35 +0200
Subject: [PATCH 0095/2750] doc(quickstart): fix typo
Signed-off-by: Ettore Di Giacinto
---
docs/content/docs/getting-started/quickstart.md | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/docs/content/docs/getting-started/quickstart.md b/docs/content/docs/getting-started/quickstart.md
index c56dced5..ff1dc6a7 100644
--- a/docs/content/docs/getting-started/quickstart.md
+++ b/docs/content/docs/getting-started/quickstart.md
@@ -61,10 +61,14 @@ Or with a docker-compose file:
version: "3.9"
services:
api:
- image: localai/localai:{{< version >}}-aio-cpu
+ image: localai/localai:latest-aio-cpu
+ # For a specific version:
+ # image: localai/localai:{{< version >}}-aio-cpu
# For Nvidia GPUs decomment one of the following (cuda11 or cuda12):
- # image: localai/localai:{{< version >}}-aio-gpu-cuda-11
- # image: localai/localai:{{< version >}}-aio-gpu-cuda-12
+ # image: localai/localai:{{< version >}}-aio-gpu-nvidia-cuda-11
+ # image: localai/localai:{{< version >}}-aio-gpu-nvidia-cuda-12
+ # image: localai/localai:latest-aio-gpu-nvidia-cuda-11
+ # image: localai/localai:latest-aio-gpu-nvidia-cuda-12
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080/readyz"]
interval: 1m
From f36d86ba6db1507814ae11c169a073d1a84d3b4f Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Sun, 7 Apr 2024 18:23:47 +0200
Subject: [PATCH 0096/2750] fix(hermes-2-pro-mistral): correct dashes in
template to suppress newlines (#1966)
Signed-off-by: Ettore Di Giacinto
---
aio/cpu/text-to-text.yaml | 21 ++++++++++++---------
aio/gpu-8g/text-to-text.yaml | 21 ++++++++++++---------
aio/intel/text-to-text.yaml | 21 ++++++++++++---------
embedded/models/hermes-2-pro-mistral.yaml | 21 ++++++++++++---------
4 files changed, 48 insertions(+), 36 deletions(-)
diff --git a/aio/cpu/text-to-text.yaml b/aio/cpu/text-to-text.yaml
index 8a20109d..6c4ec9e6 100644
--- a/aio/cpu/text-to-text.yaml
+++ b/aio/cpu/text-to-text.yaml
@@ -6,12 +6,14 @@ parameters:
template:
chat_message: |
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
- {{ if .FunctionCall }}{{end}}
- {{ if eq .RoleName "tool" }}{{end}}
- {{if .Content}}{{.Content}}{{end}}
- {{if .FunctionCall}}{{toJson .FunctionCall}}{{end}}
- {{ if .FunctionCall }} {{end}}
- {{ if eq .RoleName "tool" }}{{end}}
+ {{- if .FunctionCall }}{{end}}
+ {{- if eq .RoleName "tool" }}{{end }}
+ {{- if .Content}}
+ {{.Content}}
+ {{- end }}
+ {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }}
+ {{- if .FunctionCall }} {{end }}
+ {{- if eq .RoleName "tool" }}{{end }}
<|im_end|>
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
function: |
@@ -27,12 +29,13 @@ template:
For each function call return a json object with function name and arguments within XML tags as follows:
{'arguments': , 'name': }
- <|im_end|>
- {{.Input}}
+
+ <|im_end|>
+ {{.Input -}}
<|im_start|>assistant
chat: |
- {{.Input}}
+ {{.Input -}}
<|im_start|>assistant
completion: |
{{.Input}}
diff --git a/aio/gpu-8g/text-to-text.yaml b/aio/gpu-8g/text-to-text.yaml
index 9502cdfe..8d5c84f7 100644
--- a/aio/gpu-8g/text-to-text.yaml
+++ b/aio/gpu-8g/text-to-text.yaml
@@ -6,12 +6,14 @@ parameters:
template:
chat_message: |
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
- {{ if .FunctionCall }}{{end}}
- {{ if eq .RoleName "tool" }}{{end}}
- {{if .Content}}{{.Content}}{{end}}
- {{if .FunctionCall}}{{toJson .FunctionCall}}{{end}}
- {{ if .FunctionCall }} {{end}}
- {{ if eq .RoleName "tool" }}{{end}}
+ {{- if .FunctionCall }}{{end}}
+ {{- if eq .RoleName "tool" }}{{end }}
+ {{- if .Content}}
+ {{.Content}}
+ {{- end }}
+ {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }}
+ {{- if .FunctionCall }} {{end }}
+ {{- if eq .RoleName "tool" }}{{end }}
<|im_end|>
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
function: |
@@ -27,12 +29,13 @@ template:
For each function call return a json object with function name and arguments within XML tags as follows:
{'arguments': , 'name': }
- <|im_end|>
- {{.Input}}
+
+ <|im_end|>
+ {{.Input -}}
<|im_start|>assistant
chat: |
- {{.Input}}
+ {{.Input -}}
<|im_start|>assistant
completion: |
{{.Input}}
diff --git a/aio/intel/text-to-text.yaml b/aio/intel/text-to-text.yaml
index d2316745..a7cb5b4d 100644
--- a/aio/intel/text-to-text.yaml
+++ b/aio/intel/text-to-text.yaml
@@ -7,12 +7,14 @@ parameters:
template:
chat_message: |
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
- {{ if .FunctionCall }}{{end}}
- {{ if eq .RoleName "tool" }}{{end}}
- {{if .Content}}{{.Content}}{{end}}
- {{if .FunctionCall}}{{toJson .FunctionCall}}{{end}}
- {{ if .FunctionCall }} {{end}}
- {{ if eq .RoleName "tool" }}{{end}}
+ {{- if .FunctionCall }}{{end}}
+ {{- if eq .RoleName "tool" }}{{end }}
+ {{- if .Content}}
+ {{.Content}}
+ {{- end }}
+ {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }}
+ {{- if .FunctionCall }} {{end }}
+ {{- if eq .RoleName "tool" }}{{end }}
<|im_end|>
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
function: |
@@ -28,12 +30,13 @@ template:
For each function call return a json object with function name and arguments within XML tags as follows:
{'arguments': , 'name': }
- <|im_end|>
- {{.Input}}
+
+ <|im_end|>
+ {{.Input -}}
<|im_start|>assistant
chat: |
- {{.Input}}
+ {{.Input -}}
<|im_start|>assistant
completion: |
{{.Input}}
diff --git a/embedded/models/hermes-2-pro-mistral.yaml b/embedded/models/hermes-2-pro-mistral.yaml
index eb75b97c..7bfa9418 100644
--- a/embedded/models/hermes-2-pro-mistral.yaml
+++ b/embedded/models/hermes-2-pro-mistral.yaml
@@ -6,12 +6,14 @@ parameters:
template:
chat_message: |
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
- {{ if .FunctionCall }}{{end}}
- {{ if eq .RoleName "tool" }}{{end}}
- {{if .Content}}{{.Content}}{{end}}
- {{if .FunctionCall}}{{toJson .FunctionCall}}{{end}}
- {{ if .FunctionCall }} {{end}}
- {{ if eq .RoleName "tool" }}{{end}}
+ {{- if .FunctionCall }}{{end}}
+ {{- if eq .RoleName "tool" }}{{end }}
+ {{- if .Content}}
+ {{.Content}}
+ {{- end }}
+ {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }}
+ {{- if .FunctionCall }} {{end }}
+ {{- if eq .RoleName "tool" }}{{end }}
<|im_end|>
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
function: |
@@ -27,12 +29,13 @@ template:
For each function call return a json object with function name and arguments within XML tags as follows:
{'arguments': , 'name': }
- <|im_end|>
- {{.Input}}
+
+ <|im_end|>
+ {{.Input -}}
<|im_start|>assistant
chat: |
- {{.Input}}
+ {{.Input -}}
<|im_start|>assistant
completion: |
{{.Input}}
From a153b628c2444fa24cb901ef27f3fe0cb3fcde17 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Mon, 8 Apr 2024 08:38:17 +0200
Subject: [PATCH 0097/2750] :arrow_up: Update ggerganov/whisper.cpp (#1969)
Signed-off-by: GitHub
Co-authored-by: mudler
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index 290a4adc..d11a47bc 100644
--- a/Makefile
+++ b/Makefile
@@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
# whisper.cpp version
-WHISPER_CPP_VERSION?=1d7657f40974e251ea42275e155a8abfb24228ef
+WHISPER_CPP_VERSION?=13c22321d1ac758ce68a429c23104e234b440769
# bert.cpp version
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
From efcca15d3f9db3da6ceecf1c49224674dcd8f13f Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Mon, 8 Apr 2024 08:38:47 +0200
Subject: [PATCH 0098/2750] :arrow_up: Update ggerganov/llama.cpp (#1970)
Signed-off-by: GitHub
Co-authored-by: mudler
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index d11a47bc..10791a4b 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=54ea0698fbf87e36a5d68a98c95f6bdd0fb91557
+CPPLLAMA_VERSION?=855f54402e866ed19d8d675b56a81c844c64b325
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
From a38618db0278b9460688e4f6275009d3c601a2bc Mon Sep 17 00:00:00 2001
From: fakezeta
Date: Mon, 8 Apr 2024 22:33:51 +0200
Subject: [PATCH 0099/2750] fix regression #1971 (#1972)
fixes regression #1971 introduced by intel_extension_for_transformers==1.4
---
backend/python/transformers/transformers_server.py | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py
index 04324d9b..c7f1cd75 100755
--- a/backend/python/transformers/transformers_server.py
+++ b/backend/python/transformers/transformers_server.py
@@ -22,11 +22,7 @@ import torch.cuda
XPU=os.environ.get("XPU", "0") == "1"
if XPU:
- import intel_extension_for_pytorch as ipex
- from intel_extension_for_transformers.transformers.modeling import AutoModelForCausalLM
from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer
- from optimum.intel.openvino import OVModelForCausalLM
- from openvino.runtime import Core
else:
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, set_seed, BitsAndBytesConfig, TextIteratorStreamer
@@ -115,6 +111,9 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
try:
if request.Type == "AutoModelForCausalLM":
if XPU:
+ import intel_extension_for_pytorch as ipex
+ from intel_extension_for_transformers.transformers.modeling import AutoModelForCausalLM
+
device_map="xpu"
compute=torch.float16
if request.Quantization == "xpu_4bit":
@@ -141,6 +140,9 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
device_map=device_map,
torch_dtype=compute)
elif request.Type == "OVModelForCausalLM":
+ from optimum.intel.openvino import OVModelForCausalLM
+ from openvino.runtime import Core
+
if "GPU" in Core().available_devices:
device_map="GPU"
else:
From 195be10050d552d83e3f0729b367d0359edf60d9 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Mon, 8 Apr 2024 23:26:52 +0200
Subject: [PATCH 0100/2750] :arrow_up: Update ggerganov/llama.cpp (#1973)
Signed-off-by: GitHub
Co-authored-by: mudler
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index 10791a4b..d42e1a99 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=855f54402e866ed19d8d675b56a81c844c64b325
+CPPLLAMA_VERSION?=cc4a95426d17417d3c83f12bdb514fbe8abe2a88
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
From 2bbb221fb18cb119c384f86739c1433cec8a491b Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Mon, 8 Apr 2024 21:28:59 +0000
Subject: [PATCH 0101/2750] tests(petals): temp disable
---
.github/workflows/test-extra.yml | 52 ++++++++++++++++----------------
1 file changed, 26 insertions(+), 26 deletions(-)
diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml
index 6f92c806..7689f06d 100644
--- a/.github/workflows/test-extra.yml
+++ b/.github/workflows/test-extra.yml
@@ -133,34 +133,34 @@ jobs:
- tests-petals:
- runs-on: ubuntu-latest
- steps:
- - name: Clone
- uses: actions/checkout@v4
- with:
- submodules: true
- - name: Dependencies
- run: |
- sudo apt-get update
- sudo apt-get install build-essential ffmpeg
- curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
- sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
- gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
- sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
- sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
- sudo apt-get update && \
- sudo apt-get install -y conda
- sudo apt-get install -y ca-certificates cmake curl patch
- sudo apt-get install -y libopencv-dev
+ # tests-petals:
+ # runs-on: ubuntu-latest
+ # steps:
+ # - name: Clone
+ # uses: actions/checkout@v4
+ # with:
+ # submodules: true
+ # - name: Dependencies
+ # run: |
+ # sudo apt-get update
+ # sudo apt-get install build-essential ffmpeg
+ # curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
+ # sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
+ # gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
+ # sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
+ # sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
+ # sudo apt-get update && \
+ # sudo apt-get install -y conda
+ # sudo apt-get install -y ca-certificates cmake curl patch
+ # sudo apt-get install -y libopencv-dev
- sudo rm -rfv /usr/bin/conda || true
+ # sudo rm -rfv /usr/bin/conda || true
- - name: Test petals
- run: |
- export PATH=$PATH:/opt/conda/bin
- make --jobs=5 --output-sync=target -C backend/python/petals
- make --jobs=5 --output-sync=target -C backend/python/petals test
+ # - name: Test petals
+ # run: |
+ # export PATH=$PATH:/opt/conda/bin
+ # make --jobs=5 --output-sync=target -C backend/python/petals
+ # make --jobs=5 --output-sync=target -C backend/python/petals test
From cc3d601836891fc4694745929f90204c684b4152 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Tue, 9 Apr 2024 09:49:11 +0200
Subject: [PATCH 0102/2750] ci: fixup latest image push
Signed-off-by: Ettore Di Giacinto
---
.github/workflows/image_build.yml | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml
index cba78933..bd244dcf 100644
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -282,6 +282,7 @@ jobs:
docker tag localai/localai:${{ steps.meta.outputs.version }} localai/localai:${{ inputs.latest-image }}
docker pull quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
docker tag quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image }}
+ docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image }}
- name: Latest AIO tag
# run this on branches, when it is a tag and there is a latest-image defined
if: github.event_name != 'pull_request' && inputs.latest-image-aio != '' && github.ref_type == 'tag'
@@ -290,7 +291,8 @@ jobs:
docker tag localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }} localai/localai:${{ inputs.latest-image-aio }}
docker pull quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }}
docker tag quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
-
+ docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
+
- name: job summary
run: |
echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
From aeb3f835aef7c80da7ad2ccae433d11449493061 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Wed, 10 Apr 2024 09:07:21 +0200
Subject: [PATCH 0103/2750] :arrow_up: Update docs version mudler/LocalAI
(#1978)
Signed-off-by: GitHub
Co-authored-by: mudler
---
docs/data/version.json | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/data/version.json b/docs/data/version.json
index b6372479..cc0478ca 100644
--- a/docs/data/version.json
+++ b/docs/data/version.json
@@ -1,3 +1,3 @@
{
- "version": "v2.11.0"
+ "version": "v2.12.1"
}
From 951e39d36c06bb14b3b95b27309d7be809f4a3f4 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Wed, 10 Apr 2024 09:07:41 +0200
Subject: [PATCH 0104/2750] :arrow_up: Update ggerganov/llama.cpp (#1979)
Signed-off-by: GitHub
Co-authored-by: mudler
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index d42e1a99..b43541ff 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=cc4a95426d17417d3c83f12bdb514fbe8abe2a88
+CPPLLAMA_VERSION?=1b67731e184e27a465b8c5476061294a4af668ea
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
From 7e2f8bb4083eea3939072dab2cb47261b1b97603 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Wed, 10 Apr 2024 09:08:00 +0200
Subject: [PATCH 0105/2750] :arrow_up: Update ggerganov/whisper.cpp (#1980)
Signed-off-by: GitHub
Co-authored-by: mudler
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index b43541ff..337ebc64 100644
--- a/Makefile
+++ b/Makefile
@@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
# whisper.cpp version
-WHISPER_CPP_VERSION?=13c22321d1ac758ce68a429c23104e234b440769
+WHISPER_CPP_VERSION?=8f253ef3af1c62c04316ba4afa7145fc4d701a8c
# bert.cpp version
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
From d692b2c32a400a4aa0c6df9a51aa4f3cbe73edff Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Wed, 10 Apr 2024 10:31:59 +0200
Subject: [PATCH 0106/2750] ci: push latest images for dockerhub (#1984)
Fixes: #1983
Signed-off-by: Ettore Di Giacinto
---
.github/workflows/image_build.yml | 2 ++
1 file changed, 2 insertions(+)
diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml
index bd244dcf..b0684a4c 100644
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -280,6 +280,7 @@ jobs:
run: |
docker pull localai/localai:${{ steps.meta.outputs.version }}
docker tag localai/localai:${{ steps.meta.outputs.version }} localai/localai:${{ inputs.latest-image }}
+ docker push localai/localai:${{ inputs.latest-image }}
docker pull quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
docker tag quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image }}
docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image }}
@@ -289,6 +290,7 @@ jobs:
run: |
docker pull localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }}
docker tag localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }} localai/localai:${{ inputs.latest-image-aio }}
+ docker push localai/localai:${{ inputs.latest-image-aio }}
docker pull quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }}
docker tag quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
From d23e73b11828b59a608174dc679eb5a3c2d2f42f Mon Sep 17 00:00:00 2001
From: "Sebastian.W"
Date: Wed, 10 Apr 2024 18:36:10 +0800
Subject: [PATCH 0107/2750] fix(autogptq): do not use_triton with qwen-vl
(#1985)
* Enhance autogptq backend to support VL models
* update dependencies for autogptq
* remove redundant auto-gptq dependency
* Convert base64 to image_url for Qwen-VL model
* implemented model inference for qwen-vl
* remove user prompt from generated answer
* fixed write image error
* fixed use_triton issue when loading Qwen-VL model
---------
Co-authored-by: Binghua Wu
---
backend/python/autogptq/autogptq.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/backend/python/autogptq/autogptq.py b/backend/python/autogptq/autogptq.py
index bbafdd92..c7c35028 100755
--- a/backend/python/autogptq/autogptq.py
+++ b/backend/python/autogptq/autogptq.py
@@ -39,7 +39,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
self.model_name = "Qwen-VL-Chat"
model = AutoModelForCausalLM.from_pretrained(model_path,
trust_remote_code=request.TrustRemoteCode,
- use_triton=request.UseTriton,
device_map="auto").eval()
else:
model = AutoGPTQForCausalLM.from_quantized(model_path,
From 36da11a0ee4cdf575012c669150d5f617362e619 Mon Sep 17 00:00:00 2001
From: Koen Farell
Date: Wed, 10 Apr 2024 14:25:26 +0300
Subject: [PATCH 0108/2750] deps: Update version of vLLM to add support of
Cohere Command_R model in vLLM inference (#1975)
* Update vLLM version to add support of Command_R
Signed-off-by: Koen Farell
* fix: Fixed vllm version from requirements
Signed-off-by: Koen Farell
* chore: Update transformers-rocm.yml
Signed-off-by: Koen Farell
* chore: Update transformers.yml version of vllm
Signed-off-by: Koen Farell
---------
Signed-off-by: Koen Farell
---
backend/python/common-env/transformers/transformers-nvidia.yml | 2 +-
backend/python/common-env/transformers/transformers-rocm.yml | 2 +-
backend/python/common-env/transformers/transformers.yml | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/backend/python/common-env/transformers/transformers-nvidia.yml b/backend/python/common-env/transformers/transformers-nvidia.yml
index e8d8155b..e12b5dbb 100644
--- a/backend/python/common-env/transformers/transformers-nvidia.yml
+++ b/backend/python/common-env/transformers/transformers-nvidia.yml
@@ -116,7 +116,7 @@ dependencies:
- sudachipy
- sudachidict_core
- vocos
- - vllm==0.3.2
+ - vllm>=0.4.0
- transformers>=4.38.2 # Updated Version
- transformers_stream_generator==0.0.5
- xformers==0.0.23.post1
diff --git a/backend/python/common-env/transformers/transformers-rocm.yml b/backend/python/common-env/transformers/transformers-rocm.yml
index fa245bf4..48fac8bf 100644
--- a/backend/python/common-env/transformers/transformers-rocm.yml
+++ b/backend/python/common-env/transformers/transformers-rocm.yml
@@ -104,7 +104,7 @@ dependencies:
- sudachipy
- sudachidict_core
- vocos
- - vllm==0.3.2
+ - vllm>=0.4.0
- transformers>=4.38.2 # Updated Version
- transformers_stream_generator==0.0.5
- xformers==0.0.23.post1
diff --git a/backend/python/common-env/transformers/transformers.yml b/backend/python/common-env/transformers/transformers.yml
index 3b3b8fe7..843b13fa 100644
--- a/backend/python/common-env/transformers/transformers.yml
+++ b/backend/python/common-env/transformers/transformers.yml
@@ -108,7 +108,7 @@ dependencies:
- sudachipy
- sudachidict_core
- vocos
- - vllm==0.3.2
+ - vllm>=0.4.0
- transformers>=4.38.2 # Updated Version
- transformers_stream_generator==0.0.5
- xformers==0.0.23.post1
From 93f51d80d41b3b3748da41ad4cb7baf8c762890c Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Wed, 10 Apr 2024 16:29:46 +0200
Subject: [PATCH 0109/2750] Update gpt-vision.md
Signed-off-by: Ettore Di Giacinto
---
docs/content/docs/features/gpt-vision.md | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/docs/content/docs/features/gpt-vision.md b/docs/content/docs/features/gpt-vision.md
index 3afcab16..827e2c08 100644
--- a/docs/content/docs/features/gpt-vision.md
+++ b/docs/content/docs/features/gpt-vision.md
@@ -22,6 +22,17 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
```
+Grammars and function tools can be used as well in conjunction with vision APIs:
+
+```bash
+ curl http://10.1.0.36:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+ "model": "llava", "grammar": "root ::= (\"yes\" | \"no\")",
+ "messages": [{"role": "user", "content": [{"type":"text", "text": "Is there some grass in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
+```
+
### Setup
+All-in-One images have already shipped the llava model as `gpt-4-vision-preview`, so no setup is needed in this case.
+
To setup the LLaVa models, follow the full example in the [configuration examples](https://github.com/mudler/LocalAI/blob/master/examples/configurations/README.md#llava).
+
From 636d487dc84c6f1d99ba7630d8851865091c42cb Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Wed, 10 Apr 2024 16:30:03 +0200
Subject: [PATCH 0110/2750] Update gpt-vision.md
Signed-off-by: Ettore Di Giacinto
---
docs/content/docs/features/gpt-vision.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/content/docs/features/gpt-vision.md b/docs/content/docs/features/gpt-vision.md
index 827e2c08..9e021273 100644
--- a/docs/content/docs/features/gpt-vision.md
+++ b/docs/content/docs/features/gpt-vision.md
@@ -25,7 +25,7 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso
Grammars and function tools can be used as well in conjunction with vision APIs:
```bash
- curl http://10.1.0.36:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "llava", "grammar": "root ::= (\"yes\" | \"no\")",
"messages": [{"role": "user", "content": [{"type":"text", "text": "Is there some grass in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
```
From 92005b9c0285f31e7f29ca4f37e6afa194745cf0 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Wed, 10 Apr 2024 16:30:57 +0200
Subject: [PATCH 0111/2750] Update openai-functions.md
Signed-off-by: Ettore Di Giacinto
---
docs/content/docs/features/openai-functions.md | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/docs/content/docs/features/openai-functions.md b/docs/content/docs/features/openai-functions.md
index 843524f4..435101ee 100644
--- a/docs/content/docs/features/openai-functions.md
+++ b/docs/content/docs/features/openai-functions.md
@@ -144,6 +144,15 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso
}'
```
+Grammars and function tools can be used as well in conjunction with vision APIs:
+
+```bash
+ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+ "model": "llava", "grammar": "root ::= (\"yes\" | \"no\")",
+ "messages": [{"role": "user", "content": [{"type":"text", "text": "Is there some grass in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
+```
+
+
## 💡 Examples
A full e2e example with `docker-compose` is available [here](https://github.com/go-skynet/LocalAI/tree/master/examples/functions).
From 24d7dadfed6ddf19e91652c3eb45d04ad1d15584 Mon Sep 17 00:00:00 2001
From: cryptk <421501+cryptk@users.noreply.github.com>
Date: Thu, 11 Apr 2024 02:19:24 -0500
Subject: [PATCH 0112/2750] feat: kong cli refactor fixes #1955 (#1974)
* feat: migrate to alecthomas/kong for CLI
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
* feat: bring in new flag for granular log levels
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
* chore: go mod tidy
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
* feat: allow loading cli flag values from ["./localai.yaml", "~/.config/localai.yaml", "/etc/localai.yaml"] in that order
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
* feat: load from .env file instead of a yaml file
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
* feat: better loading for environment files
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
* feat(doc): add initial documentation about configuration
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
* fix: remove test log lines
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
* feat: integrate new documentation into existing pages
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
* feat: add documentation on .env files
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
* fix: cleanup some documentation table errors
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
* feat: refactor CLI logic out to it's own package under core/cli
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
---------
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
---
.env | 38 +-
core/cli/cli.go | 20 +
core/cli/models.go | 74 +++
core/cli/run.go | 155 +++++
core/cli/transcript.go | 54 ++
core/cli/tts.go | 61 ++
docs/content/docs/advanced/advanced-usage.md | 101 +++-
go.mod | 16 +-
go.sum | 61 +-
main.go | 595 +++----------------
10 files changed, 552 insertions(+), 623 deletions(-)
create mode 100644 core/cli/cli.go
create mode 100644 core/cli/models.go
create mode 100644 core/cli/run.go
create mode 100644 core/cli/transcript.go
create mode 100644 core/cli/tts.go
diff --git a/.env b/.env
index 82a64e3d..35d4f2d7 100644
--- a/.env
+++ b/.env
@@ -1,33 +1,33 @@
## Set number of threads.
## Note: prefer the number of physical cores. Overbooking the CPU degrades performance notably.
-# THREADS=14
+# LOCALAI_THREADS=14
## Specify a different bind address (defaults to ":8080")
-# ADDRESS=127.0.0.1:8080
+# LOCALAI_ADDRESS=127.0.0.1:8080
## Default models context size
-# CONTEXT_SIZE=512
+# LOCALAI_CONTEXT_SIZE=512
#
## Define galleries.
## models will to install will be visible in `/models/available`
-# GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}]
+# LOCALAI_GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}]
## CORS settings
-# CORS=true
-# CORS_ALLOW_ORIGINS=*
+# LOCALAI_CORS=true
+# LOCALAI_CORS_ALLOW_ORIGINS=*
## Default path for models
#
-# MODELS_PATH=/models
+# LOCALAI_MODELS_PATH=/models
## Enable debug mode
-# DEBUG=true
+# LOCALAI_LOG_LEVEL=debug
## Disables COMPEL (Diffusers)
# COMPEL=0
## Enable/Disable single backend (useful if only one GPU is available)
-# SINGLE_ACTIVE_BACKEND=true
+# LOCALAI_SINGLE_ACTIVE_BACKEND=true
## Specify a build type. Available: cublas, openblas, clblas.
## cuBLAS: This is a GPU-accelerated version of the complete standard BLAS (Basic Linear Algebra Subprograms) library. It's provided by Nvidia and is part of their CUDA toolkit.
@@ -46,13 +46,13 @@
# GO_TAGS=stablediffusion
## Path where to store generated images
-# IMAGE_PATH=/tmp
+# LOCALAI_IMAGE_PATH=/tmp/generated/images
## Specify a default upload limit in MB (whisper)
-# UPLOAD_LIMIT
+# LOCALAI_UPLOAD_LIMIT=15
## List of external GRPC backends (note on the container image this variable is already set to use extra backends available in extra/)
-# EXTERNAL_GRPC_BACKENDS=my-backend:127.0.0.1:9000,my-backend2:/usr/bin/backend.py
+# LOCALAI_EXTERNAL_GRPC_BACKENDS=my-backend:127.0.0.1:9000,my-backend2:/usr/bin/backend.py
### Advanced settings ###
### Those are not really used by LocalAI, but from components in the stack ###
@@ -72,18 +72,18 @@
# LLAMACPP_PARALLEL=1
### Enable to run parallel requests
-# PARALLEL_REQUESTS=true
+# LOCALAI_PARALLEL_REQUESTS=true
### Watchdog settings
###
# Enables watchdog to kill backends that are inactive for too much time
-# WATCHDOG_IDLE=true
-#
-# Enables watchdog to kill backends that are busy for too much time
-# WATCHDOG_BUSY=true
+# LOCALAI_WATCHDOG_IDLE=true
#
# Time in duration format (e.g. 1h30m) after which a backend is considered idle
-# WATCHDOG_IDLE_TIMEOUT=5m
+# LOCALAI_WATCHDOG_IDLE_TIMEOUT=5m
+#
+# Enables watchdog to kill backends that are busy for too much time
+# LOCALAI_WATCHDOG_BUSY=true
#
# Time in duration format (e.g. 1h30m) after which a backend is considered busy
-# WATCHDOG_BUSY_TIMEOUT=5m
\ No newline at end of file
+# LOCALAI_WATCHDOG_BUSY_TIMEOUT=5m
\ No newline at end of file
diff --git a/core/cli/cli.go b/core/cli/cli.go
new file mode 100644
index 00000000..5e757f64
--- /dev/null
+++ b/core/cli/cli.go
@@ -0,0 +1,20 @@
+package cli
+
+import "embed"
+
+type Context struct {
+ Debug bool `env:"LOCALAI_DEBUG,DEBUG" default:"false" hidden:"" help:"DEPRECATED, use --log-level=debug instead. Enable debug logging"`
+ LogLevel *string `env:"LOCALAI_LOG_LEVEL" enum:"error,warn,info,debug" help:"Set the level of logs to output [${enum}]"`
+
+ // This field is not a command line argument/flag, the struct tag excludes it from the parsed CLI
+ BackendAssets embed.FS `kong:"-"`
+}
+
+var CLI struct {
+ Context `embed:""`
+
+ Run RunCMD `cmd:"" help:"Run LocalAI, this the default command if no other command is specified. Run 'local-ai run --help' for more information" default:"withargs"`
+ Models ModelsCMD `cmd:"" help:"Manage LocalAI models and definitions"`
+ TTS TTSCMD `cmd:"" help:"Convert text to speech"`
+ Transcript TranscriptCMD `cmd:"" help:"Convert audio to text"`
+}
diff --git a/core/cli/models.go b/core/cli/models.go
new file mode 100644
index 00000000..62ef366b
--- /dev/null
+++ b/core/cli/models.go
@@ -0,0 +1,74 @@
+package cli
+
+import (
+ "encoding/json"
+ "fmt"
+
+ "github.com/go-skynet/LocalAI/pkg/gallery"
+ "github.com/rs/zerolog/log"
+ "github.com/schollz/progressbar/v3"
+)
+
+type ModelsCMDFlags struct {
+ Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models"`
+ ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
+}
+
+type ModelsList struct {
+ ModelsCMDFlags `embed:""`
+}
+
+type ModelsInstall struct {
+ ModelArgs []string `arg:"" optional:"" name:"models" help:"Model configuration URLs to load"`
+
+ ModelsCMDFlags `embed:""`
+}
+
+type ModelsCMD struct {
+ List ModelsList `cmd:"" help:"List the models avaiable in your galleries" default:"withargs"`
+ Install ModelsInstall `cmd:"" help:"Install a model from the gallery"`
+}
+
+func (ml *ModelsList) Run(ctx *Context) error {
+ var galleries []gallery.Gallery
+ if err := json.Unmarshal([]byte(ml.Galleries), &galleries); err != nil {
+ log.Error().Err(err).Msg("unable to load galleries")
+ }
+
+ models, err := gallery.AvailableGalleryModels(galleries, ml.ModelsPath)
+ if err != nil {
+ return err
+ }
+ for _, model := range models {
+ if model.Installed {
+ fmt.Printf(" * %s@%s (installed)\n", model.Gallery.Name, model.Name)
+ } else {
+ fmt.Printf(" - %s@%s\n", model.Gallery.Name, model.Name)
+ }
+ }
+ return nil
+}
+
+func (mi *ModelsInstall) Run(ctx *Context) error {
+ modelName := mi.ModelArgs[0]
+
+ var galleries []gallery.Gallery
+ if err := json.Unmarshal([]byte(mi.Galleries), &galleries); err != nil {
+ log.Error().Err(err).Msg("unable to load galleries")
+ }
+
+ progressBar := progressbar.NewOptions(
+ 1000,
+ progressbar.OptionSetDescription(fmt.Sprintf("downloading model %s", modelName)),
+ progressbar.OptionShowBytes(false),
+ progressbar.OptionClearOnFinish(),
+ )
+ progressCallback := func(fileName string, current string, total string, percentage float64) {
+ progressBar.Set(int(percentage * 10))
+ }
+ err := gallery.InstallModelFromGallery(galleries, modelName, mi.ModelsPath, gallery.GalleryModel{}, progressCallback)
+ if err != nil {
+ return err
+ }
+ return nil
+}
diff --git a/core/cli/run.go b/core/cli/run.go
new file mode 100644
index 00000000..09d09979
--- /dev/null
+++ b/core/cli/run.go
@@ -0,0 +1,155 @@
+package cli
+
+import (
+ "fmt"
+ "os"
+ "strings"
+ "time"
+
+ "github.com/go-skynet/LocalAI/core/config"
+ "github.com/go-skynet/LocalAI/core/http"
+ "github.com/go-skynet/LocalAI/core/startup"
+ "github.com/rs/zerolog/log"
+)
+
+type RunCMD struct {
+ ModelArgs []string `arg:"" optional:"" name:"models" help:"Model configuration URLs to load"`
+
+ ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
+ BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
+ ImagePath string `env:"LOCALAI_IMAGE_PATH,IMAGE_PATH" type:"path" default:"/tmp/generated/images" help:"Location for images generated by backends (e.g. stablediffusion)" group:"storage"`
+ AudioPath string `env:"LOCALAI_AUDIO_PATH,AUDIO_PATH" type:"path" default:"/tmp/generated/audio" help:"Location for audio generated by backends (e.g. piper)" group:"storage"`
+ UploadPath string `env:"LOCALAI_UPLOAD_PATH,UPLOAD_PATH" type:"path" default:"/tmp/localai/upload" help:"Path to store uploads from files api" group:"storage"`
+ ConfigPath string `env:"LOCALAI_CONFIG_PATH,CONFIG_PATH" default:"/tmp/localai/config" group:"storage"`
+ LocalaiConfigDir string `env:"LOCALAI_CONFIG_DIR" type:"path" default:"${basepath}/configuration" help:"Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json)" group:"storage"`
+ // The alias on this option is there to preserve functionality with the old `--config-file` parameter
+ ModelsConfigFile string `env:"LOCALAI_MODELS_CONFIG_FILE,CONFIG_FILE" aliases:"config-file" help:"YAML file containing a list of model backend configs" group:"storage"`
+
+ Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models"`
+ AutoloadGalleries bool `env:"LOCALAI_AUTOLOAD_GALLERIES,AUTOLOAD_GALLERIES" group:"models"`
+ RemoteLibrary string `env:"LOCALAI_REMOTE_LIBRARY,REMOTE_LIBRARY" default:"${remoteLibraryURL}" help:"A LocalAI remote library URL" group:"models"`
+ PreloadModels string `env:"LOCALAI_PRELOAD_MODELS,PRELOAD_MODELS" help:"A List of models to apply in JSON at start" group:"models"`
+ Models []string `env:"LOCALAI_MODELS,MODELS" help:"A List of model configuration URLs to load" group:"models"`
+ PreloadModelsConfig string `env:"LOCALAI_PRELOAD_MODELS_CONFIG,PRELOAD_MODELS_CONFIG" help:"A List of models to apply at startup. Path to a YAML config file" group:"models"`
+
+ F16 bool `name:"f16" env:"LOCALAI_F16,F16" help:"Enable GPU acceleration" group:"performance"`
+ Threads int `env:"LOCALAI_THREADS,THREADS" short:"t" default:"4" help:"Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested" group:"performance"`
+ ContextSize int `env:"LOCALAI_CONTEXT_SIZE,CONTEXT_SIZE" default:"512" help:"Default context size for models" group:"performance"`
+
+ Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
+ CORS bool `env:"LOCALAI_CORS,CORS" help:"" group:"api"`
+ CORSAllowOrigins string `env:"LOCALAI_CORS_ALLOW_ORIGINS,CORS_ALLOW_ORIGINS" group:"api"`
+ UploadLimit int `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"`
+ APIKeys []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"`
+ DisableWelcome bool `env:"LOCALAI_DISABLE_WELCOME,DISABLE_WELCOME" default:"false" help:"Disable welcome pages" group:"api"`
+
+ ParallelRequests bool `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"`
+ SingleActiveBackend bool `env:"LOCALAI_SINGLE_ACTIVE_BACKEND,SINGLE_ACTIVE_BACKEND" help:"Allow only one backend to be run at a time" group:"backends"`
+ PreloadBackendOnly bool `env:"LOCALAI_PRELOAD_BACKEND_ONLY,PRELOAD_BACKEND_ONLY" default:"false" help:"Do not launch the API services, only the preloaded models / backends are started (useful for multi-node setups)" group:"backends"`
+ ExternalGRPCBackends []string `env:"LOCALAI_EXTERNAL_GRPC_BACKENDS,EXTERNAL_GRPC_BACKENDS" help:"A list of external grpc backends" group:"backends"`
+ EnableWatchdogIdle bool `env:"LOCALAI_WATCHDOG_IDLE,WATCHDOG_IDLE" default:"false" help:"Enable watchdog for stopping backends that are idle longer than the watchdog-idle-timeout" group:"backends"`
+ WatchdogIdleTimeout string `env:"LOCALAI_WATCHDOG_IDLE_TIMEOUT,WATCHDOG_IDLE_TIMEOUT" default:"15m" help:"Threshold beyond which an idle backend should be stopped" group:"backends"`
+ EnableWatchdogBusy bool `env:"LOCALAI_WATCHDOG_BUSY,WATCHDOG_BUSY" default:"false" help:"Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout" group:"backends"`
+ WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"`
+}
+
+func (r *RunCMD) Run(ctx *Context) error {
+ opts := []config.AppOption{
+ config.WithConfigFile(r.ModelsConfigFile),
+ config.WithJSONStringPreload(r.PreloadModels),
+ config.WithYAMLConfigPreload(r.PreloadModelsConfig),
+ config.WithModelPath(r.ModelsPath),
+ config.WithContextSize(r.ContextSize),
+ config.WithDebug(ctx.Debug),
+ config.WithImageDir(r.ImagePath),
+ config.WithAudioDir(r.AudioPath),
+ config.WithUploadDir(r.UploadPath),
+ config.WithConfigsDir(r.ConfigPath),
+ config.WithF16(r.F16),
+ config.WithStringGalleries(r.Galleries),
+ config.WithModelLibraryURL(r.RemoteLibrary),
+ config.WithDisableMessage(false),
+ config.WithCors(r.CORS),
+ config.WithCorsAllowOrigins(r.CORSAllowOrigins),
+ config.WithThreads(r.Threads),
+ config.WithBackendAssets(ctx.BackendAssets),
+ config.WithBackendAssetsOutput(r.BackendAssetsPath),
+ config.WithUploadLimitMB(r.UploadLimit),
+ config.WithApiKeys(r.APIKeys),
+ config.WithModelsURL(append(r.Models, r.ModelArgs...)...),
+ }
+
+ idleWatchDog := r.EnableWatchdogIdle
+ busyWatchDog := r.EnableWatchdogBusy
+
+ if r.DisableWelcome {
+ opts = append(opts, config.DisableWelcomePage)
+ }
+
+ if idleWatchDog || busyWatchDog {
+ opts = append(opts, config.EnableWatchDog)
+ if idleWatchDog {
+ opts = append(opts, config.EnableWatchDogIdleCheck)
+ dur, err := time.ParseDuration(r.WatchdogIdleTimeout)
+ if err != nil {
+ return err
+ }
+ opts = append(opts, config.SetWatchDogIdleTimeout(dur))
+ }
+ if busyWatchDog {
+ opts = append(opts, config.EnableWatchDogBusyCheck)
+ dur, err := time.ParseDuration(r.WatchdogBusyTimeout)
+ if err != nil {
+ return err
+ }
+ opts = append(opts, config.SetWatchDogBusyTimeout(dur))
+ }
+ }
+ if r.ParallelRequests {
+ opts = append(opts, config.EnableParallelBackendRequests)
+ }
+ if r.SingleActiveBackend {
+ opts = append(opts, config.EnableSingleBackend)
+ }
+
+ // split ":" to get backend name and the uri
+ for _, v := range r.ExternalGRPCBackends {
+ backend := v[:strings.IndexByte(v, ':')]
+ uri := v[strings.IndexByte(v, ':')+1:]
+ opts = append(opts, config.WithExternalBackend(backend, uri))
+ }
+
+ if r.AutoloadGalleries {
+ opts = append(opts, config.EnableGalleriesAutoload)
+ }
+
+ if r.PreloadBackendOnly {
+ _, _, _, err := startup.Startup(opts...)
+ return err
+ }
+
+ cl, ml, options, err := startup.Startup(opts...)
+
+ if err != nil {
+ return fmt.Errorf("failed basic startup tasks with error %s", err.Error())
+ }
+
+ // Watch the configuration directory
+ // If the directory does not exist, we don't watch it
+ if _, err := os.Stat(r.LocalaiConfigDir); err == nil {
+ closeConfigWatcherFn, err := startup.WatchConfigDirectory(r.LocalaiConfigDir, options)
+ defer closeConfigWatcherFn()
+
+ if err != nil {
+ return fmt.Errorf("failed while watching configuration directory %s", r.LocalaiConfigDir)
+ }
+ }
+
+ appHTTP, err := http.App(cl, ml, options)
+ if err != nil {
+ log.Error().Err(err).Msg("error during HTTP App construction")
+ return err
+ }
+
+ return appHTTP.Listen(r.Address)
+}
diff --git a/core/cli/transcript.go b/core/cli/transcript.go
new file mode 100644
index 00000000..9f36a77c
--- /dev/null
+++ b/core/cli/transcript.go
@@ -0,0 +1,54 @@
+package cli
+
+import (
+ "context"
+ "errors"
+ "fmt"
+
+ "github.com/go-skynet/LocalAI/core/backend"
+ "github.com/go-skynet/LocalAI/core/config"
+ "github.com/go-skynet/LocalAI/pkg/model"
+)
+
+type TranscriptCMD struct {
+ Filename string `arg:""`
+
+ Backend string `short:"b" default:"whisper" help:"Backend to run the transcription model"`
+ Model string `short:"m" required:"" help:"Model name to run the TTS"`
+ Language string `short:"l" help:"Language of the audio file"`
+ Threads int `short:"t" default:"1" help:"Number of threads used for parallel computation"`
+ ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
+ BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
+}
+
+func (t *TranscriptCMD) Run(ctx *Context) error {
+ opts := &config.ApplicationConfig{
+ ModelPath: t.ModelsPath,
+ Context: context.Background(),
+ AssetsDestination: t.BackendAssetsPath,
+ }
+
+ cl := config.NewBackendConfigLoader()
+ ml := model.NewModelLoader(opts.ModelPath)
+ if err := cl.LoadBackendConfigsFromPath(t.ModelsPath); err != nil {
+ return err
+ }
+
+ c, exists := cl.GetBackendConfig(t.Model)
+ if !exists {
+ return errors.New("model not found")
+ }
+
+ c.Threads = &t.Threads
+
+ defer ml.StopAllGRPC()
+
+ tr, err := backend.ModelTranscription(t.Filename, t.Language, ml, c, opts)
+ if err != nil {
+ return err
+ }
+ for _, segment := range tr.Segments {
+ fmt.Println(segment.Start.String(), "-", segment.Text)
+ }
+ return nil
+}
diff --git a/core/cli/tts.go b/core/cli/tts.go
new file mode 100644
index 00000000..1d8fd3a3
--- /dev/null
+++ b/core/cli/tts.go
@@ -0,0 +1,61 @@
+package cli
+
+import (
+ "context"
+ "fmt"
+ "os"
+ "path/filepath"
+ "strings"
+
+ "github.com/go-skynet/LocalAI/core/backend"
+ "github.com/go-skynet/LocalAI/core/config"
+ "github.com/go-skynet/LocalAI/pkg/model"
+)
+
+type TTSCMD struct {
+ Text []string `arg:""`
+
+ Backend string `short:"b" default:"piper" help:"Backend to run the TTS model"`
+ Model string `short:"m" required:"" help:"Model name to run the TTS"`
+ Voice string `short:"v" help:"Voice name to run the TTS"`
+ OutputFile string `short:"o" type:"path" help:"The path to write the output wav file"`
+ ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
+ BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
+}
+
+func (t *TTSCMD) Run(ctx *Context) error {
+ outputFile := t.OutputFile
+ outputDir := t.BackendAssetsPath
+ if outputFile != "" {
+ outputDir = filepath.Dir(outputFile)
+ }
+
+ text := strings.Join(t.Text, " ")
+
+ opts := &config.ApplicationConfig{
+ ModelPath: t.ModelsPath,
+ Context: context.Background(),
+ AudioDir: outputDir,
+ AssetsDestination: t.BackendAssetsPath,
+ }
+ ml := model.NewModelLoader(opts.ModelPath)
+
+ defer ml.StopAllGRPC()
+
+ options := config.BackendConfig{}
+ options.SetDefaults()
+
+ filePath, _, err := backend.ModelTTS(t.Backend, text, t.Model, t.Voice, ml, opts, options)
+ if err != nil {
+ return err
+ }
+ if outputFile != "" {
+ if err := os.Rename(filePath, outputFile); err != nil {
+ return err
+ }
+ fmt.Printf("Generate file %s\n", outputFile)
+ } else {
+ fmt.Printf("Generate file %s\n", filePath)
+ }
+ return nil
+}
diff --git a/docs/content/docs/advanced/advanced-usage.md b/docs/content/docs/advanced/advanced-usage.md
index c9926bab..dace5803 100644
--- a/docs/content/docs/advanced/advanced-usage.md
+++ b/docs/content/docs/advanced/advanced-usage.md
@@ -382,35 +382,84 @@ docker run --env-file .env localai
### CLI parameters
-You can control LocalAI with command line arguments, to specify a binding address, or the number of threads.
+You can control LocalAI with command line arguments, to specify a binding address, or the number of threads. Any command line parameter can be specified via an environment variable.
+#### Global Flags
+| Parameter | Default | Description | Environment Variable |
+|-----------|---------|-------------|----------------------|
+| -h, --help | | Show context-sensitive help. |
+| --log-level | info | Set the level of logs to output [error,warn,info,debug] | $LOCALAI_LOG_LEVEL |
-| Parameter | Environmental Variable | Default Variable | Description |
-| ------------------------------ | ------------------------------- | -------------------------------------------------- | ------------------------------------------------------------------- |
-| --f16 | $F16 | false | Enable f16 mode |
-| --debug | $DEBUG | false | Enable debug mode |
-| --cors | $CORS | false | Enable CORS support |
-| --cors-allow-origins value | $CORS_ALLOW_ORIGINS | | Specify origins allowed for CORS |
-| --threads value | $THREADS | 4 | Number of threads to use for parallel computation |
-| --models-path value | $MODELS_PATH | ./models | Path to the directory containing models used for inferencing |
-| --preload-models value | $PRELOAD_MODELS | | List of models to preload in JSON format at startup |
-| --preload-models-config value | $PRELOAD_MODELS_CONFIG | | A config with a list of models to apply at startup. Specify the path to a YAML config file |
-| --config-file value | $CONFIG_FILE | | Path to the config file |
-| --address value | $ADDRESS | :8080 | Specify the bind address for the API server |
-| --image-path value | $IMAGE_PATH | | Path to the directory used to store generated images |
-| --context-size value | $CONTEXT_SIZE | 512 | Default context size of the model |
-| --upload-limit value | $UPLOAD_LIMIT | 15 | Default upload limit in megabytes (audio file upload) |
-| --galleries | $GALLERIES | | Allows to set galleries from command line |
-|--parallel-requests | $PARALLEL_REQUESTS | false | Enable backends to handle multiple requests in parallel. This is for backends that supports multiple requests in parallel, like llama.cpp or vllm |
-| --single-active-backend | $SINGLE_ACTIVE_BACKEND | false | Allow only one backend to be running |
-| --api-keys value | $API_KEY | empty | List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys.
-| --enable-watchdog-idle | $WATCHDOG_IDLE | false | Enable watchdog for stopping idle backends. This will stop the backends if are in idle state for too long. (default: false) [$WATCHDOG_IDLE]
-| --enable-watchdog-busy | $WATCHDOG_BUSY | false | Enable watchdog for stopping busy backends that exceed a defined threshold.|
-| --watchdog-busy-timeout value | $WATCHDOG_BUSY_TIMEOUT | 5m | Watchdog timeout. This will restart the backend if it crashes. |
-| --watchdog-idle-timeout value | $WATCHDOG_IDLE_TIMEOUT | 15m | Watchdog idle timeout. This will restart the backend if it crashes. |
-| --preload-backend-only | $PRELOAD_BACKEND_ONLY | false | If set, the api is NOT launched, and only the preloaded models / backends are started. This is intended for multi-node setups. |
-| --external-grpc-backends | EXTERNAL_GRPC_BACKENDS | none | Comma separated list of external gRPC backends to use. Format: `name:host:port` or `name:/path/to/file` |
+#### Storage Flags
+| Parameter | Default | Description | Environment Variable |
+|-----------|---------|-------------|----------------------|
+| --models-path | /home/cryptk/Documents/sourcecode/LocalAI/models | Path containing models used for inferencing | $LOCALAI_MODELS_PATH |
+| --backend-assets-path |/tmp/localai/backend_data | Path used to extract libraries that are required by some of the backends in runtime | $LOCALAI_BACKEND_ASSETS_PATH |
+| --image-path | /tmp/generated/images | Location for images generated by backends (e.g. stablediffusion) | $LOCALAI_IMAGE_PATH |
+| --audio-path | /tmp/generated/audio | Location for audio generated by backends (e.g. piper) | $LOCALAI_AUDIO_PATH |
+| --upload-path | /tmp/localai/upload | Path to store uploads from files api | $LOCALAI_UPLOAD_PATH |
+| --config-path | /tmp/localai/config | | $LOCALAI_CONFIG_PATH |
+| --localai-config-dir | /home/cryptk/Documents/sourcecode/LocalAI/configuration | Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json) | $LOCALAI_CONFIG_DIR |
+| --models-config-file | STRING | YAML file containing a list of model backend configs | $LOCALAI_MODELS_CONFIG_FILE |
+#### Models Flags
+| Parameter | Default | Description | Environment Variable |
+|-----------|---------|-------------|----------------------|
+| --galleries | STRING | JSON list of galleries | $LOCALAI_GALLERIES |
+| --autoload-galleries | | | $LOCALAI_AUTOLOAD_GALLERIES |
+| --remote-library | "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/model_library.yaml" | A LocalAI remote library URL | $LOCALAI_REMOTE_LIBRARY |
+| --preload-models | STRING | A List of models to apply in JSON at start |$LOCALAI_PRELOAD_MODELS |
+| --models | MODELS,... | A List of model configuration URLs to load | $LOCALAI_MODELS |
+| --preload-models-config | STRING | A List of models to apply at startup. Path to a YAML config file | $LOCALAI_PRELOAD_MODELS_CONFIG |
+
+#### Performance Flags
+| Parameter | Default | Description | Environment Variable |
+|-----------|---------|-------------|----------------------|
+| --f16 | | Enable GPU acceleration | $LOCALAI_F16 |
+| -t, --threads | 4 | Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested | $LOCALAI_THREADS |
+| --context-size | 512 | Default context size for models | $LOCALAI_CONTEXT_SIZE |
+
+#### API Flags
+| Parameter | Default | Description | Environment Variable |
+|-----------|---------|-------------|----------------------|
+| --address | ":8080" | Bind address for the API server | $LOCALAI_ADDRESS |
+| --cors | | | $LOCALAI_CORS |
+| --cors-allow-origins | | | $LOCALAI_CORS_ALLOW_ORIGINS |
+| --upload-limit | 15 | Default upload-limit in MB | $LOCALAI_UPLOAD_LIMIT |
+| --api-keys | API-KEYS,... | List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys | $LOCALAI_API_KEY |
+| --disable-welcome | | Disable welcome pages | $LOCALAI_DISABLE_WELCOME |
+
+#### Backend Flags
+| Parameter | Default | Description | Environment Variable |
+|-----------|---------|-------------|----------------------|
+| --parallel-requests | | Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm) | $LOCALAI_PARALLEL_REQUESTS |
+| --single-active-backend | | Allow only one backend to be run at a time | $LOCALAI_SINGLE_ACTIVE_BACKEND |
+| --preload-backend-only | | Do not launch the API services, only the preloaded models / backends are started (useful for multi-node setups) | $LOCALAI_PRELOAD_BACKEND_ONLY |
+| --external-grpc-backends | EXTERNAL-GRPC-BACKENDS,... | A list of external grpc backends | $LOCALAI_EXTERNAL_GRPC_BACKENDS |
+| --enable-watchdog-idle | | Enable watchdog for stopping backends that are idle longer than the watchdog-idle-timeout | $LOCALAI_WATCHDOG_IDLE |
+| --watchdog-idle-timeout | 15m | Threshold beyond which an idle backend should be stopped | $LOCALAI_WATCHDOG_IDLE_TIMEOUT, $WATCHDOG_IDLE_TIMEOUT |
+| --enable-watchdog-busy | | Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout | $LOCALAI_WATCHDOG_BUSY |
+| --watchdog-busy-timeout | 5m | Threshold beyond which a busy backend should be stopped | $LOCALAI_WATCHDOG_BUSY_TIMEOUT |
+
+### .env files
+
+Any settings being provided by an Environment Variable can also be provided from within .env files. There are several locations that will be checked for relevant .env files. In order of precedence they are:
+
+- .env within the current directory
+- localai.env within the current directory
+- localai.env within the home directory
+- .config/localai.env within the home directory
+- /etc/localai.env
+
+Environment variables within files earlier in the list will take precedence over environment variables defined in files later in the list.
+
+An example .env file is:
+
+```
+LOCALAI_THREADS=10
+LOCALAI_MODELS_PATH=/mnt/storage/localai/models
+LOCALAI_F16=true
+```
### Extra backends
diff --git a/go.mod b/go.mod
index 4dd207c7..fac7acfd 100644
--- a/go.mod
+++ b/go.mod
@@ -13,8 +13,8 @@ require (
github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1
github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428
github.com/gofiber/fiber/v2 v2.52.0
+ github.com/gofiber/swagger v1.0.0
github.com/gofiber/template/html/v2 v2.1.1
- github.com/gomarkdown/markdown v0.0.0-20231222211730-1d6d20845b47
github.com/google/uuid v1.5.0
github.com/hashicorp/go-multierror v1.1.1
github.com/hpcloud/tail v1.0.0
@@ -30,11 +30,12 @@ require (
github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5
github.com/prometheus/client_golang v1.17.0
github.com/rs/zerolog v1.31.0
+ github.com/russross/blackfriday v1.6.0
github.com/sashabaranov/go-openai v1.20.4
github.com/schollz/progressbar/v3 v3.13.1
github.com/stretchr/testify v1.9.0
+ github.com/swaggo/swag v1.16.3
github.com/tmc/langchaingo v0.0.0-20231019140956-c636b3da7701
- github.com/urfave/cli/v2 v2.27.1
github.com/valyala/fasthttp v1.51.0
go.opentelemetry.io/otel v1.19.0
go.opentelemetry.io/otel/exporters/prometheus v0.42.0
@@ -64,8 +65,6 @@ require (
github.com/Masterminds/semver/v3 v3.2.0 // indirect
github.com/Microsoft/go-winio v0.6.0 // indirect
github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 // indirect
- github.com/PuerkitoBio/purell v1.2.1 // indirect
- github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
github.com/alecthomas/chroma v0.10.0 // indirect
github.com/aymanbagabas/go-osc52 v1.0.3 // indirect
github.com/aymerick/douceur v0.2.0 // indirect
@@ -85,7 +84,6 @@ require (
github.com/go-openapi/jsonreference v0.21.0 // indirect
github.com/go-openapi/spec v0.21.0 // indirect
github.com/go-openapi/swag v0.23.0 // indirect
- github.com/gofiber/swagger v1.0.0 // indirect
github.com/gofiber/template v1.8.3 // indirect
github.com/gofiber/utils v1.1.0 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
@@ -119,12 +117,10 @@ require (
github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16 // indirect
github.com/prometheus/common v0.44.0 // indirect
github.com/prometheus/procfs v0.11.1 // indirect
- github.com/russross/blackfriday v1.6.0 // indirect
github.com/shopspring/decimal v1.2.0 // indirect
github.com/sirupsen/logrus v1.8.1 // indirect
github.com/spf13/cast v1.3.1 // indirect
github.com/swaggo/files/v2 v2.0.0 // indirect
- github.com/swaggo/swag v1.16.3 // indirect
github.com/ulikunitz/xz v0.5.9 // indirect
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f // indirect
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect
@@ -140,12 +136,11 @@ require (
google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d // indirect
gopkg.in/fsnotify.v1 v1.4.7 // indirect
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect
- sigs.k8s.io/yaml v1.4.0 // indirect
)
require (
+ github.com/alecthomas/kong v0.9.0
github.com/andybalholm/brotli v1.0.5 // indirect
- github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect
github.com/go-audio/audio v1.0.0 // indirect
github.com/go-audio/riff v1.0.0 // indirect
github.com/go-logr/logr v1.2.4 // indirect
@@ -153,16 +148,15 @@ require (
github.com/google/go-cmp v0.6.0 // indirect
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect
github.com/hashicorp/errwrap v1.0.0 // indirect
+ github.com/joho/godotenv v1.5.1
github.com/klauspost/compress v1.17.0 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/mattn/go-runewidth v0.0.15 // indirect
github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760
github.com/rivo/uniseg v0.2.0 // indirect
- github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/valyala/bytebufferpool v1.0.0 // indirect
github.com/valyala/tcplisten v1.0.0 // indirect
- github.com/xrash/smetrics v0.0.0-20240312152122-5f08fbb34913 // indirect
golang.org/x/net v0.22.0 // indirect
golang.org/x/sys v0.18.0 // indirect
golang.org/x/text v0.14.0 // indirect
diff --git a/go.sum b/go.sum
index f81f10c8..dc08c465 100644
--- a/go.sum
+++ b/go.sum
@@ -13,12 +13,14 @@ github.com/Microsoft/go-winio v0.6.0 h1:slsWYD/zyx7lCXoZVlvQrj0hPTM1HI4+v1sIda2y
github.com/Microsoft/go-winio v0.6.0/go.mod h1:cTAf44im0RAYeL23bpB+fzCyDH2MJiz2BO69KH/soAE=
github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 h1:TngWCqHvy9oXAN6lEVMRuU21PR1EtLVZJmdB18Gu3Rw=
github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5/go.mod h1:lmUJ/7eu/Q8D7ML55dXQrVaamCz2vxCfdQBasLZfHKk=
-github.com/PuerkitoBio/purell v1.2.1 h1:QsZ4TjvwiMpat6gBCBxEQI0rcS9ehtkKtSpiUnd9N28=
-github.com/PuerkitoBio/purell v1.2.1/go.mod h1:ZwHcC/82TOaovDi//J/804umJFFmbOHPngi8iYYv/Eo=
-github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M=
-github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
+github.com/alecthomas/assert/v2 v2.6.0 h1:o3WJwILtexrEUk3cUVal3oiQY2tfgr/FHWiz/v2n4FU=
+github.com/alecthomas/assert/v2 v2.6.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k=
github.com/alecthomas/chroma v0.10.0 h1:7XDcGkCQopCNKjZHfYrNLraA+M7e0fMiJ/Mfikbfjek=
github.com/alecthomas/chroma v0.10.0/go.mod h1:jtJATyUxlIORhUOFNA9NZDWGAQ8wpxQQqNSB4rjA/1s=
+github.com/alecthomas/kong v0.9.0 h1:G5diXxc85KvoV2f0ZRVuMsi45IrBgx9zDNGNj165aPA=
+github.com/alecthomas/kong v0.9.0/go.mod h1:Y47y5gKfHp1hDc7CH7OeXgLIpp+Q2m1Ni0L5s3bI8Os=
+github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc=
+github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4=
github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y=
github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs=
github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
@@ -45,10 +47,6 @@ github.com/containerd/continuity v0.3.0/go.mod h1:wJEAIwKOm/pBZuBd0JmeTvnLquTB1A
github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
-github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w=
-github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
-github.com/cpuguy83/go-md2man/v2 v2.0.4 h1:wfIWP927BUkWJb2NmU/kNDYIBTh/ziUX91+lVfRxZq4=
-github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.11 h1:07n33Z8lZxZ2qwegKbObQohDhXDQxiMMz1NOUGYlesw=
github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4=
@@ -95,8 +93,6 @@ github.com/go-openapi/spec v0.21.0 h1:LTVzPc3p/RzRnkQqLRndbAzjY0d0BCL72A6j3CdL9Z
github.com/go-openapi/spec v0.21.0/go.mod h1:78u6VdPw81XU44qEWGhtr982gJ5BWg2c0I5XwVMotYk=
github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE=
github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ=
-github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428 h1:WYjkXL0Nw7dN2uDBMVCWQ8xLavrIhjF/DLczuh5L9TY=
-github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428/go.mod h1:iub0ugfTnflE3rcIuqV2pQSo15nEw3GLW/utm5gyERo=
github.com/go-sql-driver/mysql v1.7.1 h1:lUIinVbN1DY0xBg0eMOzmmtGoHwWBbvnWubQUrtU8EI=
github.com/go-sql-driver/mysql v1.7.1/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI=
github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE=
@@ -104,8 +100,6 @@ github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEe
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/godbus/dbus/v5 v5.0.6/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
-github.com/gofiber/fiber/v2 v2.50.0 h1:ia0JaB+uw3GpNSCR5nvC5dsaxXjRU5OEu36aytx+zGw=
-github.com/gofiber/fiber/v2 v2.50.0/go.mod h1:21eytvay9Is7S6z+OgPi7c7n4++tnClWmhpimVHMimw=
github.com/gofiber/fiber/v2 v2.52.0 h1:S+qXi7y+/Pgvqq4DrSmREGiFwtB7Bu6+QFLuIHYw/UE=
github.com/gofiber/fiber/v2 v2.52.0/go.mod h1:KEOE+cXMhXG0zHc9d8+E38hoX+ZN7bhOtgeF2oT6jrQ=
github.com/gofiber/swagger v1.0.0 h1:BzUzDS9ZT6fDUa692kxmfOjc1DZiloLiPK/W5z1H1tc=
@@ -131,8 +125,6 @@ github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg
github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
github.com/golang/snappy v0.0.2 h1:aeE13tS0IiQgFjYdoL8qN3K1N2bXXtI6Vi51/y7BpMw=
github.com/golang/snappy v0.0.2/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
-github.com/gomarkdown/markdown v0.0.0-20231222211730-1d6d20845b47 h1:k4Tw0nt6lwro3Uin8eqoET7MDA4JnT8YgbCjc/g5E3k=
-github.com/gomarkdown/markdown v0.0.0-20231222211730-1d6d20845b47/go.mod h1:JDGcbDT52eL4fju3sZ4TeHGsQwhG9nbDV21aMyhwPoA=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
@@ -147,8 +139,6 @@ github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLe
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4=
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ=
github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
-github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4=
-github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.5.0 h1:1p67kYwdtXjb0gL0BPiP1Av9wiZPo5A8z2cWkTZ+eyU=
github.com/google/uuid v1.5.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/gorilla/css v1.0.0 h1:BQqNyPTi50JCFMTw/b67hByjMVXZRwGha6wxVGkeihY=
@@ -157,6 +147,8 @@ github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/U
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
+github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
+github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI=
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
github.com/huandu/xstrings v1.3.3 h1:/Gcsuc1x8JVbJ9/rlye4xZnVAbEkGauT8lbebqcQws4=
@@ -165,6 +157,8 @@ github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:
github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA=
github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4=
github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY=
+github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
+github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw=
@@ -172,8 +166,6 @@ github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.11.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
-github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I=
-github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
github.com/klauspost/compress v1.17.0 h1:Rnbp4K9EjcDuVuHtd0dgA4qNuv9yKDYKK1ulpJwgrqM=
github.com/klauspost/compress v1.17.0/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
@@ -198,7 +190,6 @@ github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxec
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
-github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
@@ -296,8 +287,6 @@ github.com/rs/zerolog v1.31.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWR
github.com/russross/blackfriday v1.6.0 h1:KqfZb0pUVN2lYqZUYRddxF4OR8ZMURnJIG5Y3VRLtww=
github.com/russross/blackfriday v1.6.0/go.mod h1:ti0ldHuxg49ri4ksnFxlkCfN+hvslNlmVHqNRXXJNAY=
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
-github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
-github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/sashabaranov/go-openai v1.20.4 h1:095xQ/fAtRa0+Rj21sezVJABgKfGPNbyx/sAN/hJUmg=
github.com/sashabaranov/go-openai v1.20.4/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
github.com/schollz/progressbar/v3 v3.13.1 h1:o8rySDYiQ59Mwzy2FELeHY5ZARXZTVJC7iHD6PEFUiE=
@@ -328,7 +317,6 @@ github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
-github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
@@ -349,14 +337,8 @@ github.com/ulikunitz/xz v0.5.8/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oW
github.com/ulikunitz/xz v0.5.9 h1:RsKRIA2MO8x56wkkcd3LbtcE/uMszhb6DpRf+3uwa3I=
github.com/ulikunitz/xz v0.5.9/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
-github.com/urfave/cli/v2 v2.25.7 h1:VAzn5oq403l5pHjc4OhD54+XGO9cdKVL/7lDjF+iKUs=
-github.com/urfave/cli/v2 v2.25.7/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ=
-github.com/urfave/cli/v2 v2.27.1 h1:8xSQ6szndafKVRmfyeUMxkNUJQMjL1F2zmsZ+qHpfho=
-github.com/urfave/cli/v2 v2.27.1/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ=
github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
-github.com/valyala/fasthttp v1.50.0 h1:H7fweIlBm0rXLs2q0XbalvJ6r0CUPFWK3/bB4N13e9M=
-github.com/valyala/fasthttp v1.50.0/go.mod h1:k2zXd82h/7UZc3VOdJ2WaUqt1uZ/XpXAfE9i+HBC3lA=
github.com/valyala/fasthttp v1.51.0 h1:8b30A5JlZ6C7AS81RsWjYMQmrZG6feChmgAolCl1SqA=
github.com/valyala/fasthttp v1.51.0/go.mod h1:oI2XroL+lI7vdXyYoQk03bXBThfFl2cVdIA3Xl7cH8g=
github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8=
@@ -371,10 +353,6 @@ github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17
github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y=
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofmx9yWTog9BfvIu0q41lo=
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos=
-github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU=
-github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8=
-github.com/xrash/smetrics v0.0.0-20240312152122-5f08fbb34913 h1:+qGGcbkzsfDQNPPe9UDgpxAWQrhbbBXOYJFQDq/dtJw=
-github.com/xrash/smetrics v0.0.0-20240312152122-5f08fbb34913/go.mod h1:4aEEwZQutDLsQv2Deui4iYQ6DWTxR14g6m8Wv88+Xqk=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
@@ -401,15 +379,11 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4=
-golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc=
-golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4=
golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA=
golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
-golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc=
-golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/mod v0.16.0 h1:QX4fJ0Rr5cPQCF7O9lh9Se4pmwfwskqZfq5moyldzic=
golang.org/x/mod v0.16.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -425,8 +399,6 @@ golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.0.0-20221002022538-bcab6841153b/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk=
golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY=
-golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
-golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
golang.org/x/net v0.22.0 h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc=
golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -435,9 +407,8 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJ
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E=
-golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ=
+golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -471,16 +442,12 @@ golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y=
-golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4=
golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc=
golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U=
-golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek=
-golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U=
golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8=
golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
@@ -488,8 +455,6 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
-golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
-golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
@@ -499,8 +464,6 @@ golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roY
golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
-golang.org/x/tools v0.12.0 h1:YW6HUoUmYBpwSgyaGaZq1fHjrBjX1rlpZ54T6mu2kss=
-golang.org/x/tools v0.12.0/go.mod h1:Sc0INKfu04TlqNoRA1hgpFZbhYXHPr4V5DzpSBTPqQM=
golang.org/x/tools v0.19.0 h1:tfGCXNR1OsFG+sVdLAitlpjAvD/I6dHDKnYrpEZUHkw=
golang.org/x/tools v0.19.0/go.mod h1:qoJWxmGSIBmAeriMx19ogtrEPrGtDbPK634QFIcLAhc=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
@@ -541,5 +504,3 @@ gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gotest.tools/v3 v3.0.2/go.mod h1:3SzNCllyD9/Y+b5r9JIKQ474KzkZyqLqEfYqMsX94Bk=
gotest.tools/v3 v3.3.0 h1:MfDY1b1/0xN1CyMlQDac0ziEy9zJQd9CXBRRDHw2jJo=
gotest.tools/v3 v3.3.0/go.mod h1:Mcr9QNxkg0uMvy/YElmo4SpXgJKWgQvYrT7Kw5RzJ1A=
-sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
-sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY=
diff --git a/main.go b/main.go
index 53966ba5..8b5696d1 100644
--- a/main.go
+++ b/main.go
@@ -1,41 +1,30 @@
package main
import (
- "context"
- "encoding/json"
- "errors"
- "fmt"
"os"
"os/signal"
"path/filepath"
- "strings"
"syscall"
- "time"
-
- "github.com/go-skynet/LocalAI/core/backend"
- "github.com/go-skynet/LocalAI/core/config"
-
- "github.com/go-skynet/LocalAI/core/http"
- "github.com/go-skynet/LocalAI/core/startup"
+ "github.com/alecthomas/kong"
+ "github.com/go-skynet/LocalAI/core/cli"
"github.com/go-skynet/LocalAI/internal"
- "github.com/go-skynet/LocalAI/pkg/gallery"
- model "github.com/go-skynet/LocalAI/pkg/model"
+ "github.com/joho/godotenv"
+
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
- progressbar "github.com/schollz/progressbar/v3"
- "github.com/urfave/cli/v2"
_ "github.com/go-skynet/LocalAI/swagger"
)
-const (
- remoteLibraryURL = "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/model_library.yaml"
-)
-
func main() {
+ var err error
+
+ // Initialize zerolog at a level of INFO, we will set the desired level after we parse the CLI options
log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr})
- // clean up process
+ zerolog.SetGlobalLevel(zerolog.InfoLevel)
+
+ // Catch signals from the OS requesting us to exit
go func() {
c := make(chan os.Signal, 1) // we need to reserve to buffer size 1, so the notifier are not blocked
signal.Notify(c, os.Interrupt, syscall.SIGTERM)
@@ -43,511 +32,83 @@ func main() {
os.Exit(1)
}()
- path, err := os.Getwd()
- if err != nil {
- log.Error().Err(err).Msg("failed to get current directory")
- os.Exit(1)
+ // handle loading environment variabled from .env files
+ envFiles := []string{".env", "localai.env"}
+ homeDir, err := os.UserHomeDir()
+ if err == nil {
+ envFiles = append(envFiles, filepath.Join(homeDir, "localai.env"), filepath.Join(homeDir, ".config/localai.env"))
+ }
+ envFiles = append(envFiles, "/etc/localai.env")
+
+ for _, envFile := range envFiles {
+ if _, err := os.Stat(envFile); err == nil {
+ log.Info().Str("envFile", envFile).Msg("loading environment variables from file")
+ godotenv.Load(envFile)
+ }
}
- app := &cli.App{
- Name: "LocalAI",
- Version: internal.PrintableVersion(),
- Usage: "OpenAI, OSS alternative. Drop-in compatible API for running LLM, GPT and genAI models locally on CPU, GPUs with consumer grade hardware. Supported server endpoints: OpenAI, Elevenlabs",
- Flags: []cli.Flag{
- &cli.BoolFlag{
- Name: "f16",
- EnvVars: []string{"F16"},
- },
- &cli.BoolFlag{
- Name: "autoload-galleries",
- EnvVars: []string{"AUTOLOAD_GALLERIES"},
- },
- &cli.BoolFlag{
- Name: "debug",
- EnvVars: []string{"DEBUG"},
- },
- &cli.BoolFlag{
- Name: "single-active-backend",
- EnvVars: []string{"SINGLE_ACTIVE_BACKEND"},
- Usage: "Allow only one backend to be running.",
- },
- &cli.BoolFlag{
- Name: "parallel-requests",
- EnvVars: []string{"PARALLEL_REQUESTS"},
- Usage: "Enable backends to handle multiple requests in parallel. This is for backends that supports multiple requests in parallel, like llama.cpp or vllm",
- },
- &cli.BoolFlag{
- Name: "cors",
- EnvVars: []string{"CORS"},
- },
- &cli.StringFlag{
- Name: "cors-allow-origins",
- EnvVars: []string{"CORS_ALLOW_ORIGINS"},
- },
- &cli.IntFlag{
- Name: "threads",
- Usage: "Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested.",
- EnvVars: []string{"THREADS"},
- Value: 4,
- },
- &cli.StringFlag{
- Name: "models-path",
- Usage: "Path containing models used for inferencing",
- EnvVars: []string{"MODELS_PATH"},
- Value: filepath.Join(path, "models"),
- },
- &cli.StringFlag{
- Name: "galleries",
- Usage: "JSON list of galleries",
- EnvVars: []string{"GALLERIES"},
- },
- &cli.StringFlag{
- Name: "remote-library",
- Usage: "A LocalAI remote library URL",
- EnvVars: []string{"REMOTE_LIBRARY"},
- Value: remoteLibraryURL,
- },
- &cli.StringFlag{
- Name: "preload-models",
- Usage: "A List of models to apply in JSON at start",
- EnvVars: []string{"PRELOAD_MODELS"},
- },
- &cli.StringSliceFlag{
- Name: "models",
- Usage: "A List of models URLs configurations.",
- EnvVars: []string{"MODELS"},
- },
- &cli.StringFlag{
- Name: "preload-models-config",
- Usage: "A List of models to apply at startup. Path to a YAML config file",
- EnvVars: []string{"PRELOAD_MODELS_CONFIG"},
- },
- &cli.StringFlag{
- Name: "config-file",
- Usage: "Config file",
- EnvVars: []string{"CONFIG_FILE"},
- },
- &cli.StringFlag{
- Name: "address",
- Usage: "Bind address for the API server.",
- EnvVars: []string{"ADDRESS"},
- Value: ":8080",
- },
- &cli.StringFlag{
- Name: "image-path",
- Usage: "Image directory",
- EnvVars: []string{"IMAGE_PATH"},
- Value: "/tmp/generated/images",
- },
- &cli.StringFlag{
- Name: "audio-path",
- Usage: "audio directory",
- EnvVars: []string{"AUDIO_PATH"},
- Value: "/tmp/generated/audio",
- },
- &cli.StringFlag{
- Name: "upload-path",
- Usage: "Path to store uploads from files api",
- EnvVars: []string{"UPLOAD_PATH"},
- Value: "/tmp/localai/upload",
- },
- &cli.StringFlag{
- Name: "config-path",
- Usage: "Path to store uploads from files api",
- EnvVars: []string{"CONFIG_PATH"},
- Value: "/tmp/localai/config",
- },
- &cli.StringFlag{
- Name: "backend-assets-path",
- Usage: "Path used to extract libraries that are required by some of the backends in runtime.",
- EnvVars: []string{"BACKEND_ASSETS_PATH"},
- Value: "/tmp/localai/backend_data",
- },
- &cli.StringSliceFlag{
- Name: "external-grpc-backends",
- Usage: "A list of external grpc backends",
- EnvVars: []string{"EXTERNAL_GRPC_BACKENDS"},
- },
- &cli.IntFlag{
- Name: "context-size",
- Usage: "Default context size of the model",
- EnvVars: []string{"CONTEXT_SIZE"},
- Value: 512,
- },
- &cli.IntFlag{
- Name: "upload-limit",
- Usage: "Default upload-limit. MB",
- EnvVars: []string{"UPLOAD_LIMIT"},
- Value: 15,
- },
- &cli.StringSliceFlag{
- Name: "api-keys",
- Usage: "List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys.",
- EnvVars: []string{"API_KEY"},
- },
- &cli.BoolFlag{
- Name: "enable-watchdog-idle",
- Usage: "Enable watchdog for stopping idle backends. This will stop the backends if are in idle state for too long.",
- EnvVars: []string{"WATCHDOG_IDLE"},
- Value: false,
- },
- &cli.BoolFlag{
- Name: "disable-welcome",
- Usage: "Disable welcome pages",
- EnvVars: []string{"DISABLE_WELCOME"},
- Value: false,
- },
- &cli.BoolFlag{
- Name: "enable-watchdog-busy",
- Usage: "Enable watchdog for stopping busy backends that exceed a defined threshold.",
- EnvVars: []string{"WATCHDOG_BUSY"},
- Value: false,
- },
- &cli.StringFlag{
- Name: "watchdog-busy-timeout",
- Usage: "Watchdog timeout. This will restart the backend if it crashes.",
- EnvVars: []string{"WATCHDOG_BUSY_TIMEOUT"},
- Value: "5m",
- },
- &cli.StringFlag{
- Name: "watchdog-idle-timeout",
- Usage: "Watchdog idle timeout. This will restart the backend if it crashes.",
- EnvVars: []string{"WATCHDOG_IDLE_TIMEOUT"},
- Value: "15m",
- },
- &cli.BoolFlag{
- Name: "preload-backend-only",
- Usage: "If set, the api is NOT launched, and only the preloaded models / backends are started. This is intended for multi-node setups.",
- EnvVars: []string{"PRELOAD_BACKEND_ONLY"},
- Value: false,
- },
- &cli.StringFlag{
- Name: "localai-config-dir",
- Usage: "Directory to use for the configuration files of LocalAI itself. This is NOT where model files should be placed.",
- EnvVars: []string{"LOCALAI_CONFIG_DIR"},
- Value: "./configuration",
- },
- },
- Description: `
-LocalAI is a drop-in replacement OpenAI API which runs inference locally.
+ // Actually parse the CLI options
+ ctx := kong.Parse(&cli.CLI,
+ kong.Description(
+ ` LocalAI is a drop-in replacement OpenAI API for running LLM, GPT and genAI models locally on CPU, GPUs with consumer grade hardware.
Some of the models compatible are:
-- Vicuna
-- Koala
-- GPT4ALL
-- GPT4ALL-J
-- Cerebras
-- Alpaca
-- StableLM (ggml quantized)
+ - Vicuna
+ - Koala
+ - GPT4ALL
+ - GPT4ALL-J
+ - Cerebras
+ - Alpaca
+ - StableLM (ggml quantized)
-For a list of compatible model, check out: https://localai.io/model-compatibility/index.html
+For a list of compatible models, check out: https://localai.io/model-compatibility/index.html
+
+Copyright: Ettore Di Giacinto
+
+Version: ${version}
`,
- UsageText: `local-ai [options]`,
- Copyright: "Ettore Di Giacinto",
- Action: func(ctx *cli.Context) error {
- opts := []config.AppOption{
- config.WithConfigFile(ctx.String("config-file")),
- config.WithJSONStringPreload(ctx.String("preload-models")),
- config.WithYAMLConfigPreload(ctx.String("preload-models-config")),
- config.WithModelPath(ctx.String("models-path")),
- config.WithContextSize(ctx.Int("context-size")),
- config.WithDebug(ctx.Bool("debug")),
- config.WithImageDir(ctx.String("image-path")),
- config.WithAudioDir(ctx.String("audio-path")),
- config.WithUploadDir(ctx.String("upload-path")),
- config.WithConfigsDir(ctx.String("config-path")),
- config.WithF16(ctx.Bool("f16")),
- config.WithStringGalleries(ctx.String("galleries")),
- config.WithModelLibraryURL(ctx.String("remote-library")),
- config.WithDisableMessage(false),
- config.WithCors(ctx.Bool("cors")),
- config.WithCorsAllowOrigins(ctx.String("cors-allow-origins")),
- config.WithThreads(ctx.Int("threads")),
- config.WithBackendAssets(backendAssets),
- config.WithBackendAssetsOutput(ctx.String("backend-assets-path")),
- config.WithUploadLimitMB(ctx.Int("upload-limit")),
- config.WithApiKeys(ctx.StringSlice("api-keys")),
- config.WithModelsURL(append(ctx.StringSlice("models"), ctx.Args().Slice()...)...),
- }
-
- idleWatchDog := ctx.Bool("enable-watchdog-idle")
- busyWatchDog := ctx.Bool("enable-watchdog-busy")
-
- if ctx.Bool("disable-welcome") {
- opts = append(opts, config.DisableWelcomePage)
- }
-
- if idleWatchDog || busyWatchDog {
- opts = append(opts, config.EnableWatchDog)
- if idleWatchDog {
- opts = append(opts, config.EnableWatchDogIdleCheck)
- dur, err := time.ParseDuration(ctx.String("watchdog-idle-timeout"))
- if err != nil {
- return err
- }
- opts = append(opts, config.SetWatchDogIdleTimeout(dur))
- }
- if busyWatchDog {
- opts = append(opts, config.EnableWatchDogBusyCheck)
- dur, err := time.ParseDuration(ctx.String("watchdog-busy-timeout"))
- if err != nil {
- return err
- }
- opts = append(opts, config.SetWatchDogBusyTimeout(dur))
- }
- }
- if ctx.Bool("parallel-requests") {
- opts = append(opts, config.EnableParallelBackendRequests)
- }
- if ctx.Bool("single-active-backend") {
- opts = append(opts, config.EnableSingleBackend)
- }
-
- externalgRPC := ctx.StringSlice("external-grpc-backends")
- // split ":" to get backend name and the uri
- for _, v := range externalgRPC {
- backend := v[:strings.IndexByte(v, ':')]
- uri := v[strings.IndexByte(v, ':')+1:]
- opts = append(opts, config.WithExternalBackend(backend, uri))
- }
-
- if ctx.Bool("autoload-galleries") {
- opts = append(opts, config.EnableGalleriesAutoload)
- }
-
- if ctx.Bool("preload-backend-only") {
- _, _, _, err := startup.Startup(opts...)
- return err
- }
-
- cl, ml, options, err := startup.Startup(opts...)
-
- if err != nil {
- return fmt.Errorf("failed basic startup tasks with error %s", err.Error())
- }
-
- configdir := ctx.String("localai-config-dir")
- // Watch the configuration directory
- // If the directory does not exist, we don't watch it
- if _, err := os.Stat(configdir); err == nil {
- closeConfigWatcherFn, err := startup.WatchConfigDirectory(ctx.String("localai-config-dir"), options)
- defer closeConfigWatcherFn()
-
- if err != nil {
- return fmt.Errorf("failed while watching configuration directory %s", ctx.String("localai-config-dir"))
- }
- }
-
- appHTTP, err := http.App(cl, ml, options)
- if err != nil {
- log.Error().Err(err).Msg("error during HTTP App construction")
- return err
- }
-
- return appHTTP.Listen(ctx.String("address"))
+ ),
+ kong.UsageOnError(),
+ kong.Vars{
+ "basepath": kong.ExpandPath("."),
+ "remoteLibraryURL": "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/model_library.yaml",
+ "version": internal.PrintableVersion(),
},
- Commands: []*cli.Command{
- {
- Name: "models",
- Usage: "List or install models",
- Subcommands: []*cli.Command{
- {
- Name: "list",
- Usage: "List the models avaiable in your galleries",
- Action: func(ctx *cli.Context) error {
- var galleries []gallery.Gallery
- if err := json.Unmarshal([]byte(ctx.String("galleries")), &galleries); err != nil {
- log.Error().Err(err).Msg("unable to load galleries")
- }
+ )
- models, err := gallery.AvailableGalleryModels(galleries, ctx.String("models-path"))
- if err != nil {
- return err
- }
- for _, model := range models {
- if model.Installed {
- fmt.Printf(" * %s@%s (installed)\n", model.Gallery.Name, model.Name)
- } else {
- fmt.Printf(" - %s@%s\n", model.Gallery.Name, model.Name)
- }
- }
- return nil
- },
- },
- {
- Name: "install",
- Usage: "Install a model from the gallery",
- Action: func(ctx *cli.Context) error {
- modelName := ctx.Args().First()
-
- var galleries []gallery.Gallery
- if err := json.Unmarshal([]byte(ctx.String("galleries")), &galleries); err != nil {
- log.Error().Err(err).Msg("unable to load galleries")
- }
-
- progressBar := progressbar.NewOptions(
- 1000,
- progressbar.OptionSetDescription(fmt.Sprintf("downloading model %s", modelName)),
- progressbar.OptionShowBytes(false),
- progressbar.OptionClearOnFinish(),
- )
- progressCallback := func(fileName string, current string, total string, percentage float64) {
- progressBar.Set(int(percentage * 10))
- }
- err = gallery.InstallModelFromGallery(galleries, modelName, ctx.String("models-path"), gallery.GalleryModel{}, progressCallback)
- if err != nil {
- return err
- }
- return nil
- },
- },
- },
- },
- {
- Name: "tts",
- Usage: "Convert text to speech",
- Flags: []cli.Flag{
- &cli.StringFlag{
- Name: "backend",
- Value: "piper",
- Aliases: []string{"b"},
- Usage: "Backend to run the TTS model",
- },
- &cli.StringFlag{
- Name: "model",
- Aliases: []string{"m"},
- Usage: "Model name to run the TTS",
- Required: true,
- },
- &cli.StringFlag{
- Name: "voice",
- Aliases: []string{"v"},
- Usage: "Voice name to run the TTS (optional)",
- Required: true,
- },
- &cli.StringFlag{
- Name: "output-file",
- Aliases: []string{"o"},
- Usage: "The path to write the output wav file",
- },
- },
- Action: func(ctx *cli.Context) error {
- modelOption := ctx.String("model")
- if modelOption == "" {
- return errors.New("--model parameter is required")
- }
- backendOption := ctx.String("backend")
- if backendOption == "" {
- backendOption = "piper"
- }
- outputFile := ctx.String("output-file")
- outputDir := ctx.String("backend-assets-path")
- if outputFile != "" {
- outputDir = filepath.Dir(outputFile)
- }
-
- text := strings.Join(ctx.Args().Slice(), " ")
-
- opts := &config.ApplicationConfig{
- ModelPath: ctx.String("models-path"),
- Context: context.Background(),
- AudioDir: outputDir,
- AssetsDestination: ctx.String("backend-assets-path"),
- }
- ml := model.NewModelLoader(opts.ModelPath)
-
- defer ml.StopAllGRPC()
-
- filePath, _, err := backend.ModelTTS(backendOption, text, modelOption, ctx.String("voice"), ml, opts, config.BackendConfig{})
- if err != nil {
- return err
- }
- if outputFile != "" {
- if err := os.Rename(filePath, outputFile); err != nil {
- return err
- }
- fmt.Printf("Generate file %s\n", outputFile)
- } else {
- fmt.Printf("Generate file %s\n", filePath)
- }
- return nil
- },
- },
- {
- Name: "transcript",
- Usage: "Convert audio to text",
- Flags: []cli.Flag{
- &cli.StringFlag{
- Name: "backend",
- Value: "whisper",
- Aliases: []string{"b"},
- Usage: "Backend to run the transcription model",
- },
- &cli.StringFlag{
- Name: "model",
- Aliases: []string{"m"},
- Usage: "Model name to run the transcription",
- },
- &cli.StringFlag{
- Name: "language",
- Aliases: []string{"l"},
- Usage: "Language of the audio file",
- },
- &cli.IntFlag{
- Name: "threads",
- Aliases: []string{"t"},
- Usage: "Threads to use",
- Value: 1,
- },
- &cli.StringFlag{
- Name: "output-file",
- Aliases: []string{"o"},
- Usage: "The path to write the output wav file",
- },
- },
- Action: func(ctx *cli.Context) error {
- modelOption := ctx.String("model")
- filename := ctx.Args().First()
- language := ctx.String("language")
- threads := ctx.Int("threads")
-
- opts := &config.ApplicationConfig{
- ModelPath: ctx.String("models-path"),
- Context: context.Background(),
- AssetsDestination: ctx.String("backend-assets-path"),
- }
-
- cl := config.NewBackendConfigLoader()
- ml := model.NewModelLoader(opts.ModelPath)
- if err := cl.LoadBackendConfigsFromPath(ctx.String("models-path")); err != nil {
- return err
- }
-
- c, exists := cl.GetBackendConfig(modelOption)
- if !exists {
- return errors.New("model not found")
- }
-
- c.Threads = &threads
-
- defer ml.StopAllGRPC()
-
- tr, err := backend.ModelTranscription(filename, language, ml, c, opts)
- if err != nil {
- return err
- }
- for _, segment := range tr.Segments {
- fmt.Println(segment.Start.String(), "-", segment.Text)
- }
- return nil
- },
- },
- },
+ // Configure the logging level before we run the application
+ // This is here to preserve the existing --debug flag functionality
+ logLevel := "info"
+ if cli.CLI.Debug && cli.CLI.LogLevel == nil {
+ logLevel = "debug"
+ zerolog.SetGlobalLevel(zerolog.DebugLevel)
+ cli.CLI.LogLevel = &logLevel
}
- err = app.Run(os.Args)
- if err != nil {
- log.Error().Err(err).Msg("application runtime error")
- os.Exit(1)
+ if cli.CLI.LogLevel == nil {
+ cli.CLI.LogLevel = &logLevel
}
+
+ switch *cli.CLI.LogLevel {
+ case "error":
+ log.Info().Msg("Setting logging to error")
+ zerolog.SetGlobalLevel(zerolog.ErrorLevel)
+ case "warn":
+ log.Info().Msg("Setting logging to warn")
+ zerolog.SetGlobalLevel(zerolog.WarnLevel)
+ case "info":
+ log.Info().Msg("Setting logging to info")
+ zerolog.SetGlobalLevel(zerolog.InfoLevel)
+ case "debug":
+ log.Info().Msg("Setting logging to debug")
+ zerolog.SetGlobalLevel(zerolog.DebugLevel)
+ }
+
+ // Populate the application with the embedded backend assets
+ cli.CLI.Context.BackendAssets = backendAssets
+
+ // Run the thing!
+ err = ctx.Run(&cli.CLI.Context)
+
+ ctx.FatalIfErrorf(err)
}
From 0e44a4e66442327fa4bf340322f2f85ca7308500 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Thu, 11 Apr 2024 09:19:46 +0200
Subject: [PATCH 0113/2750] :arrow_up: Update docs version mudler/LocalAI
(#1988)
Signed-off-by: GitHub
Co-authored-by: mudler
---
docs/data/version.json | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/data/version.json b/docs/data/version.json
index cc0478ca..1b6a2161 100644
--- a/docs/data/version.json
+++ b/docs/data/version.json
@@ -1,3 +1,3 @@
{
- "version": "v2.12.1"
+ "version": "v2.12.3"
}
From e152b07b74cda26f2513fb85755c6b860e7ca65f Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Thu, 11 Apr 2024 09:22:07 +0200
Subject: [PATCH 0114/2750] :arrow_up: Update ggerganov/llama.cpp (#1991)
Signed-off-by: GitHub
Co-authored-by: mudler
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index 337ebc64..e2e4f211 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=1b67731e184e27a465b8c5476061294a4af668ea
+CPPLLAMA_VERSION?=8228b66dbc16290c5cbd70e80ab47c068e2569d8
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
From b4548ad72dc31a00a2a819c3bf540012bf11432a Mon Sep 17 00:00:00 2001
From: Ludovic Leroux
Date: Thu, 11 Apr 2024 03:44:39 -0400
Subject: [PATCH 0115/2750] feat: add flash-attn in nvidia and rocm envs
(#1995)
Signed-off-by: Ludovic LEROUX
---
.../python/common-env/transformers/install.sh | 18 ++++++++++++------
1 file changed, 12 insertions(+), 6 deletions(-)
diff --git a/backend/python/common-env/transformers/install.sh b/backend/python/common-env/transformers/install.sh
index 8502adde..30ec0de0 100644
--- a/backend/python/common-env/transformers/install.sh
+++ b/backend/python/common-env/transformers/install.sh
@@ -2,6 +2,7 @@
set -ex
SKIP_CONDA=${SKIP_CONDA:-0}
+REQUIREMENTS_FILE=$1
# Check if environment exist
conda_env_exists(){
@@ -14,7 +15,7 @@ else
export PATH=$PATH:/opt/conda/bin
if conda_env_exists "transformers" ; then
echo "Creating virtual environment..."
- conda env create --name transformers --file $1
+ conda env create --name transformers --file $REQUIREMENTS_FILE
echo "Virtual environment created."
else
echo "Virtual environment already exists."
@@ -28,11 +29,16 @@ if [ -d "/opt/intel" ]; then
pip install intel-extension-for-transformers datasets sentencepiece tiktoken neural_speed optimum[openvino]
fi
-if [ "$PIP_CACHE_PURGE" = true ] ; then
- if [ $SKIP_CONDA -eq 0 ]; then
- # Activate conda environment
- source activate transformers
- fi
+# If we didn't skip conda, activate the environment
+# to install FlashAttention
+if [ $SKIP_CONDA -eq 0 ]; then
+ source activate transformers
+fi
+if [[ $REQUIREMENTS_FILE =~ -nvidia.yml$ ]]; then
+ #TODO: FlashAttention is supported on nvidia and ROCm, but ROCm install can't be done this easily
+ pip install flash-attn --no-build-isolation
+fi
+if [ "$PIP_CACHE_PURGE" = true ] ; then
pip cache purge
fi
\ No newline at end of file
From c74dec7e387160fa4ab1fcebed94d8d9197fa1c4 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Thu, 11 Apr 2024 11:47:54 +0200
Subject: [PATCH 0116/2750] Add dependabot.yml
Signed-off-by: Ettore Di Giacinto
---
.github/dependabot.yml | 25 +++++++++++++++++++++++++
1 file changed, 25 insertions(+)
create mode 100644 .github/dependabot.yml
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 00000000..52abf1db
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,25 @@
+# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
+version: 2
+updates:
+ - package-ecosystem: "gomod"
+ directory: "/"
+ schedule:
+ interval: "weekly"
+ - package-ecosystem: "github-actions"
+ # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
+ directory: "/"
+ schedule:
+ # Check for updates to GitHub Actions every weekday
+ interval: "weekly"
+ - package-ecosystem: "pip"
+ # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
+ directory: "/"
+ schedule:
+ # Check for updates to GitHub Actions every weekday
+ interval: "weekly"
+ - package-ecosystem: "docker"
+ # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
+ directory: "/"
+ schedule:
+ # Check for updates to GitHub Actions every weekday
+ interval: "weekly"
From 182fef339d801744c39420e10d27e98ee9f965e5 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Thu, 11 Apr 2024 12:13:06 +0200
Subject: [PATCH 0117/2750] Create dependabot_auto.yml
Signed-off-by: Ettore Di Giacinto
---
.github/workflows/dependabot_auto.yml | 44 +++++++++++++++++++++++++++
1 file changed, 44 insertions(+)
create mode 100644 .github/workflows/dependabot_auto.yml
diff --git a/.github/workflows/dependabot_auto.yml b/.github/workflows/dependabot_auto.yml
new file mode 100644
index 00000000..12541d05
--- /dev/null
+++ b/.github/workflows/dependabot_auto.yml
@@ -0,0 +1,44 @@
+name: Dependabot auto-merge
+on:
+ pull_request_target:
+ types: [review_requested]
+
+permissions:
+ contents: write
+ pull-requests: write
+ packages: read
+
+jobs:
+ dependabot:
+ runs-on: ubuntu-latest
+ if: ${{ github.actor == 'dependabot[bot]' }}
+ steps:
+ - name: Dependabot metadata
+ id: metadata
+ uses: dependabot/fetch-metadata@v1.3.4
+ with:
+ github-token: "${{ secrets.GITHUB_TOKEN }}"
+ skip-commit-verification: true
+
+ - name: Checkout repository
+ uses: actions/checkout@v3
+
+ - name: Approve a PR if not already approved
+ run: |
+ gh pr checkout "$PR_URL"
+ if [ "$(gh pr status --json reviewDecision -q .currentBranch.reviewDecision)" != "APPROVED" ];
+ then
+ gh pr review --approve "$PR_URL"
+ else
+ echo "PR already approved.";
+ fi
+ env:
+ PR_URL: ${{github.event.pull_request.html_url}}
+ GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
+
+ - name: Enable auto-merge for Dependabot PRs
+ if: ${{ contains(github.event.pull_request.title, 'bump')}}
+ run: gh pr merge --auto --merge "$PR_URL"
+ env:
+ PR_URL: ${{github.event.pull_request.html_url}}
+ GITHUB_TOKEN: ${{secrets.RELEASE_TOKEN}}
From a49248d29f637c424a29aea28e4ef947cda99b9a Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 11 Apr 2024 11:07:45 +0000
Subject: [PATCH 0118/2750] build(deps): bump google.golang.org/protobuf from
1.31.0 to 1.33.0 (#1998)
Bumps google.golang.org/protobuf from 1.31.0 to 1.33.0.
---
updated-dependencies:
- dependency-name: google.golang.org/protobuf
dependency-type: direct:production
...
Signed-off-by: dependabot[bot]
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
go.mod | 2 +-
go.sum | 20 ++++++++++++++++++--
2 files changed, 19 insertions(+), 3 deletions(-)
diff --git a/go.mod b/go.mod
index fac7acfd..d065436c 100644
--- a/go.mod
+++ b/go.mod
@@ -42,7 +42,7 @@ require (
go.opentelemetry.io/otel/metric v1.19.0
go.opentelemetry.io/otel/sdk/metric v1.19.0
google.golang.org/grpc v1.59.0
- google.golang.org/protobuf v1.31.0
+ google.golang.org/protobuf v1.33.0
gopkg.in/yaml.v2 v2.4.0
gopkg.in/yaml.v3 v3.0.1
)
diff --git a/go.sum b/go.sum
index dc08c465..8b3a8cc4 100644
--- a/go.sum
+++ b/go.sum
@@ -3,6 +3,8 @@ github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
+github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf h1:UgjXLcE9I+VaVz7uBIlzAnyZIXwiDlIiTWqCh159aUI=
+github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf/go.mod h1:UOf2Mb/deUri5agct5OJ4SLWjhI+kZKbsUVUeRb24I0=
github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=
github.com/Masterminds/semver/v3 v3.2.0 h1:3MEsd0SM6jqZojhjLWWeBY+Kcjy9i6MQAeY7YgDP83g=
@@ -64,6 +66,8 @@ github.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKoh
github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec=
github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw=
github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
+github.com/donomii/go-rwkv.cpp v0.0.0-20230715075832-c898cd0f62df h1:qVcBEZlvp5A1gGWNJj02xyDtbsUI2hohlQMSB1fgER4=
+github.com/donomii/go-rwkv.cpp v0.0.0-20230715075832-c898cd0f62df/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 h1:iFaUwBSo5Svw6L7HYpRu/0lE3e0BaElwnNO1qkNQxBY=
github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s=
github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
@@ -72,6 +76,8 @@ github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMo
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
+github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230628193450-85ed71aaec8e h1:KtbU2JR3lJuXFASHG2+sVLucfMPBjWKUUKByX6C81mQ=
+github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230628193450-85ed71aaec8e/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs=
github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA=
@@ -93,6 +99,10 @@ github.com/go-openapi/spec v0.21.0 h1:LTVzPc3p/RzRnkQqLRndbAzjY0d0BCL72A6j3CdL9Z
github.com/go-openapi/spec v0.21.0/go.mod h1:78u6VdPw81XU44qEWGhtr982gJ5BWg2c0I5XwVMotYk=
github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE=
github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ=
+github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1 h1:yXvc7QfGtoZ51tUW/YVjoTwAfh8HG88XU7UOrbNlz5Y=
+github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1/go.mod h1:fYjkCDRzC+oRLHSjQoajmYK6AmeJnmEanV27CClAcDc=
+github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428 h1:WYjkXL0Nw7dN2uDBMVCWQ8xLavrIhjF/DLczuh5L9TY=
+github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428/go.mod h1:iub0ugfTnflE3rcIuqV2pQSo15nEw3GLW/utm5gyERo=
github.com/go-sql-driver/mysql v1.7.1 h1:lUIinVbN1DY0xBg0eMOzmmtGoHwWBbvnWubQUrtU8EI=
github.com/go-sql-driver/mysql v1.7.1/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI=
github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE=
@@ -217,12 +227,18 @@ github.com/moby/sys/mountinfo v0.5.0/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdx
github.com/moby/term v0.0.0-20201216013528-df9cb8a40635 h1:rzf0wL0CHVc8CEsgyygG0Mn9CNCCPZqOPaz8RiiHYQk=
github.com/moby/term v0.0.0-20201216013528-df9cb8a40635/go.mod h1:FBS0z0QWA44HXygs7VXDUOGoN/1TV3RuWkLO04am3wc=
github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
+github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760 h1:OFVkSxR7CRSRSNm5dvpMRZwmSwWa8EMMnHbc84fW5tU=
+github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig=
github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c h1:CI5uGwqBpN8N7BrSKC+nmdfw+9nPQIDyjHHlaIiitZI=
github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c/go.mod h1:gY3wyrhkRySJtmtI/JPt4a2mKv48h/M9pEZIW+SjeC0=
+github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af h1:XFq6OUqsWQam0OrEr05okXsJK/TQur3zoZTHbiZD3Ks=
+github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af/go.mod h1:8ufRkpz/S/9ahkaxzZ5i4WMgO9w4InEhuRoT7vK5Rnw=
github.com/muesli/reflow v0.3.0 h1:IFsN6K9NfGtjeggFP+68I4chLZV2yIKsXJFNZ+eWh6s=
github.com/muesli/reflow v0.3.0/go.mod h1:pbwTDkVPibjO2kyvBQRBxTWEEGDGq0FlB1BIKtnHY/8=
github.com/muesli/termenv v0.13.0 h1:wK20DRpJdDX8b7Ek2QfhvqhRQFZ237RGRO0RQ/Iqdy0=
github.com/muesli/termenv v0.13.0/go.mod h1:sP1+uffeLaEYpyOTb8pLCUctGcGLnoFjSn4YJK5e2bc=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20231022042237-c25dc5193530 h1:YXMxHwHMB9jCBo2Yu5gz3mTB3T1TnZs/HmPLv15LUSA=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20231022042237-c25dc5193530/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
github.com/nwaples/rardecode v1.1.0 h1:vSxaY8vQhOcVr4mm5e8XllHWTiM4JF507A0Katqw7MQ=
github.com/nwaples/rardecode v1.1.0/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0=
github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A=
@@ -483,8 +499,8 @@ google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
-google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8=
-google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
+google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI=
+google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
From 079597548650a9665baa82b89e2eeafb66debcd6 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 11 Apr 2024 11:44:34 +0000
Subject: [PATCH 0119/2750] build(deps): bump github.com/docker/docker from
20.10.7+incompatible to 24.0.9+incompatible (#1999)
build(deps): bump github.com/docker/docker
Bumps [github.com/docker/docker](https://github.com/docker/docker) from 20.10.7+incompatible to 24.0.9+incompatible.
- [Release notes](https://github.com/docker/docker/releases)
- [Commits](https://github.com/docker/docker/compare/v20.10.7...v24.0.9)
---
updated-dependencies:
- dependency-name: github.com/docker/docker
dependency-type: indirect
...
Signed-off-by: dependabot[bot]
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
go.mod | 2 +-
go.sum | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/go.mod b/go.mod
index d065436c..e60c7672 100644
--- a/go.mod
+++ b/go.mod
@@ -75,7 +75,7 @@ require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/dlclark/regexp2 v1.8.1 // indirect
github.com/docker/cli v20.10.17+incompatible // indirect
- github.com/docker/docker v20.10.7+incompatible // indirect
+ github.com/docker/docker v24.0.9+incompatible // indirect
github.com/docker/go-connections v0.4.0 // indirect
github.com/docker/go-units v0.4.0 // indirect
github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect
diff --git a/go.sum b/go.sum
index 8b3a8cc4..2141db5f 100644
--- a/go.sum
+++ b/go.sum
@@ -60,8 +60,8 @@ github.com/dlclark/regexp2 v1.8.1 h1:6Lcdwya6GjPUNsBct8Lg/yRPwMhABj269AAzdGSiR+0
github.com/dlclark/regexp2 v1.8.1/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
github.com/docker/cli v20.10.17+incompatible h1:eO2KS7ZFeov5UJeaDmIs1NFEDRf32PaqRpvoEkKBy5M=
github.com/docker/cli v20.10.17+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8=
-github.com/docker/docker v20.10.7+incompatible h1:Z6O9Nhsjv+ayUEeI1IojKbYcsGdgYSNqxe1s2MYzUhQ=
-github.com/docker/docker v20.10.7+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
+github.com/docker/docker v24.0.9+incompatible h1:HPGzNmwfLZWdxHqK9/II92pyi1EpYKsAqcl4G0Of9v0=
+github.com/docker/docker v24.0.9+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
github.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKohAFqRJQ=
github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec=
github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw=
From fdfd868953a9e40d4e1b9a2eb6d428645572311d Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 11 Apr 2024 12:21:52 +0000
Subject: [PATCH 0120/2750] build(deps): bump github.com/gofiber/fiber/v2 from
2.52.0 to 2.52.1 (#2001)
Bumps [github.com/gofiber/fiber/v2](https://github.com/gofiber/fiber) from 2.52.0 to 2.52.1.
- [Release notes](https://github.com/gofiber/fiber/releases)
- [Commits](https://github.com/gofiber/fiber/compare/v2.52.0...v2.52.1)
---
updated-dependencies:
- dependency-name: github.com/gofiber/fiber/v2
dependency-type: direct:production
...
Signed-off-by: dependabot[bot]
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
go.mod | 2 +-
go.sum | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/go.mod b/go.mod
index e60c7672..238e150f 100644
--- a/go.mod
+++ b/go.mod
@@ -12,7 +12,7 @@ require (
github.com/go-audio/wav v1.1.0
github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1
github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428
- github.com/gofiber/fiber/v2 v2.52.0
+ github.com/gofiber/fiber/v2 v2.52.1
github.com/gofiber/swagger v1.0.0
github.com/gofiber/template/html/v2 v2.1.1
github.com/google/uuid v1.5.0
diff --git a/go.sum b/go.sum
index 2141db5f..c66e9b7c 100644
--- a/go.sum
+++ b/go.sum
@@ -110,8 +110,8 @@ github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEe
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/godbus/dbus/v5 v5.0.6/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
-github.com/gofiber/fiber/v2 v2.52.0 h1:S+qXi7y+/Pgvqq4DrSmREGiFwtB7Bu6+QFLuIHYw/UE=
-github.com/gofiber/fiber/v2 v2.52.0/go.mod h1:KEOE+cXMhXG0zHc9d8+E38hoX+ZN7bhOtgeF2oT6jrQ=
+github.com/gofiber/fiber/v2 v2.52.1 h1:1RoU2NS+b98o1L77sdl5mboGPiW+0Ypsi5oLmcYlgHI=
+github.com/gofiber/fiber/v2 v2.52.1/go.mod h1:KEOE+cXMhXG0zHc9d8+E38hoX+ZN7bhOtgeF2oT6jrQ=
github.com/gofiber/swagger v1.0.0 h1:BzUzDS9ZT6fDUa692kxmfOjc1DZiloLiPK/W5z1H1tc=
github.com/gofiber/swagger v1.0.0/go.mod h1:QrYNF1Yrc7ggGK6ATsJ6yfH/8Zi5bu9lA7wB8TmCecg=
github.com/gofiber/template v1.8.3 h1:hzHdvMwMo/T2kouz2pPCA0zGiLCeMnoGsQZBTSYgZxc=
From 40781ac013e4fd2574f1faef0bf5a0d491317a34 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 11 Apr 2024 12:48:30 +0000
Subject: [PATCH 0121/2750] build(deps): bump actions/checkout from 3 to 4
(#2002)
Bumps [actions/checkout](https://github.com/actions/checkout) from 3 to 4.
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/v3...v4)
---
updated-dependencies:
- dependency-name: actions/checkout
dependency-type: direct:production
update-type: version-update:semver-major
...
Signed-off-by: dependabot[bot]
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
.github/workflows/secscan.yaml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/secscan.yaml b/.github/workflows/secscan.yaml
index a5221b40..14958070 100644
--- a/.github/workflows/secscan.yaml
+++ b/.github/workflows/secscan.yaml
@@ -14,7 +14,7 @@ jobs:
GO111MODULE: on
steps:
- name: Checkout Source
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Run Gosec Security Scanner
uses: securego/gosec@master
with:
From 11a0418510aa2fba956251ec09dee442e906fcb2 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 11 Apr 2024 13:10:32 +0000
Subject: [PATCH 0122/2750] build(deps): bump actions/setup-go from 4 to 5
(#2003)
Bumps [actions/setup-go](https://github.com/actions/setup-go) from 4 to 5.
- [Release notes](https://github.com/actions/setup-go/releases)
- [Commits](https://github.com/actions/setup-go/compare/v4...v5)
---
updated-dependencies:
- dependency-name: actions/setup-go
dependency-type: direct:production
update-type: version-update:semver-major
...
Signed-off-by: dependabot[bot]
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
.github/workflows/release.yaml | 6 +++---
.github/workflows/test.yml | 4 ++--
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index 1d749189..269a10c5 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -33,7 +33,7 @@ jobs:
uses: actions/checkout@v4
with:
submodules: true
- - uses: actions/setup-go@v4
+ - uses: actions/setup-go@v5
with:
go-version: '1.21.x'
cache: false
@@ -100,7 +100,7 @@ jobs:
uses: actions/checkout@v4
with:
submodules: true
- - uses: actions/setup-go@v4
+ - uses: actions/setup-go@v5
with:
go-version: '1.21.x'
cache: false
@@ -138,7 +138,7 @@ jobs:
uses: actions/checkout@v4
with:
submodules: true
- - uses: actions/setup-go@v4
+ - uses: actions/setup-go@v5
with:
go-version: '1.21.x'
cache: false
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 95d10862..28a221bb 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -60,7 +60,7 @@ jobs:
with:
submodules: true
- name: Setup Go ${{ matrix.go-version }}
- uses: actions/setup-go@v4
+ uses: actions/setup-go@v5
with:
go-version: ${{ matrix.go-version }}
cache: false
@@ -177,7 +177,7 @@ jobs:
with:
submodules: true
- name: Setup Go ${{ matrix.go-version }}
- uses: actions/setup-go@v4
+ uses: actions/setup-go@v5
with:
go-version: ${{ matrix.go-version }}
cache: false
From 821cf0e3fd80a14688a4ebb432d0b9e6cb8a3d31 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 11 Apr 2024 13:58:04 +0000
Subject: [PATCH 0123/2750] build(deps): bump peter-evans/create-pull-request
from 5 to 6 (#2005)
Bumps [peter-evans/create-pull-request](https://github.com/peter-evans/create-pull-request) from 5 to 6.
- [Release notes](https://github.com/peter-evans/create-pull-request/releases)
- [Commits](https://github.com/peter-evans/create-pull-request/compare/v5...v6)
---
updated-dependencies:
- dependency-name: peter-evans/create-pull-request
dependency-type: direct:production
update-type: version-update:semver-major
...
Signed-off-by: dependabot[bot]
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
.github/workflows/bump_deps.yaml | 2 +-
.github/workflows/bump_docs.yaml | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/bump_deps.yaml b/.github/workflows/bump_deps.yaml
index 756398d1..2abb2cab 100644
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@@ -49,7 +49,7 @@ jobs:
run: |
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
- name: Create Pull Request
- uses: peter-evans/create-pull-request@v5
+ uses: peter-evans/create-pull-request@v6
with:
token: ${{ secrets.UPDATE_BOT_TOKEN }}
push-to-fork: ci-forks/LocalAI
diff --git a/.github/workflows/bump_docs.yaml b/.github/workflows/bump_docs.yaml
index 7d52359f..c3ab1698 100644
--- a/.github/workflows/bump_docs.yaml
+++ b/.github/workflows/bump_docs.yaml
@@ -17,7 +17,7 @@ jobs:
run: |
bash .github/bump_docs.sh ${{ matrix.repository }}
- name: Create Pull Request
- uses: peter-evans/create-pull-request@v5
+ uses: peter-evans/create-pull-request@v6
with:
token: ${{ secrets.UPDATE_BOT_TOKEN }}
push-to-fork: ci-forks/LocalAI
From 0a6956b029593dd89ba8dfd8241e01c26d19b887 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 11 Apr 2024 14:35:27 +0000
Subject: [PATCH 0124/2750] build(deps): bump actions/cache from 3 to 4 (#2006)
Bumps [actions/cache](https://github.com/actions/cache) from 3 to 4.
- [Release notes](https://github.com/actions/cache/releases)
- [Changelog](https://github.com/actions/cache/blob/main/RELEASES.md)
- [Commits](https://github.com/actions/cache/compare/v3...v4)
---
updated-dependencies:
- dependency-name: actions/cache
dependency-type: direct:production
update-type: version-update:semver-major
...
Signed-off-by: dependabot[bot]
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
.github/workflows/release.yaml | 2 +-
.github/workflows/test.yml | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index 269a10c5..8341a188 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -55,7 +55,7 @@ jobs:
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
- name: Cache grpc
id: cache-grpc
- uses: actions/cache@v3
+ uses: actions/cache@v4
with:
path: grpc
key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }}
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 28a221bb..02093b3f 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -92,7 +92,7 @@ jobs:
GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
- name: Cache grpc
id: cache-grpc
- uses: actions/cache@v3
+ uses: actions/cache@v4
with:
path: grpc
key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }}
From b606c7b7680d5592e29228daa133d88b16fbae19 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 11 Apr 2024 14:44:02 +0000
Subject: [PATCH 0125/2750] build(deps): bump actions/upload-artifact from 3 to
4 (#2007)
Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 3 to 4.
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](https://github.com/actions/upload-artifact/compare/v3...v4)
---
updated-dependencies:
- dependency-name: actions/upload-artifact
dependency-type: direct:production
update-type: version-update:semver-major
...
Signed-off-by: dependabot[bot]
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
.github/workflows/release.yaml | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index 8341a188..8198fb3d 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -82,7 +82,7 @@ jobs:
else
STATIC=true make dist
fi
- - uses: actions/upload-artifact@v3
+ - uses: actions/upload-artifact@v4
with:
name: ${{ matrix.build }}
path: release/
@@ -111,7 +111,7 @@ jobs:
run: |
make backend-assets/grpc/stablediffusion
mkdir -p release && cp backend-assets/grpc/stablediffusion release
- - uses: actions/upload-artifact@v3
+ - uses: actions/upload-artifact@v4
with:
name: stablediffusion
path: release/
@@ -154,7 +154,7 @@ jobs:
export C_INCLUDE_PATH=/usr/local/include
export CPLUS_INCLUDE_PATH=/usr/local/include
make dist
- - uses: actions/upload-artifact@v3
+ - uses: actions/upload-artifact@v4
with:
name: ${{ matrix.build }}
path: release/
From fce606fc0f2a116b25dc51fa51118a6642ba34b3 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 11 Apr 2024 15:41:58 +0000
Subject: [PATCH 0126/2750] build(deps): bump github.com/charmbracelet/glamour
from 0.6.0 to 0.7.0 (#2004)
Bumps [github.com/charmbracelet/glamour](https://github.com/charmbracelet/glamour) from 0.6.0 to 0.7.0.
- [Release notes](https://github.com/charmbracelet/glamour/releases)
- [Commits](https://github.com/charmbracelet/glamour/compare/v0.6.0...v0.7.0)
---
updated-dependencies:
- dependency-name: github.com/charmbracelet/glamour
dependency-type: direct:production
update-type: version-update:semver-minor
...
Signed-off-by: dependabot[bot]
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
go.mod | 12 ++++++------
go.sum | 30 +++++++++++++-----------------
2 files changed, 19 insertions(+), 23 deletions(-)
diff --git a/go.mod b/go.mod
index 238e150f..081c25d6 100644
--- a/go.mod
+++ b/go.mod
@@ -5,7 +5,7 @@ go 1.21
require (
github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf
github.com/Masterminds/sprig/v3 v3.2.3
- github.com/charmbracelet/glamour v0.6.0
+ github.com/charmbracelet/glamour v0.7.0
github.com/donomii/go-rwkv.cpp v0.0.0-20230715075832-c898cd0f62df
github.com/fsnotify/fsnotify v1.7.0
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230628193450-85ed71aaec8e
@@ -65,8 +65,8 @@ require (
github.com/Masterminds/semver/v3 v3.2.0 // indirect
github.com/Microsoft/go-winio v0.6.0 // indirect
github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 // indirect
- github.com/alecthomas/chroma v0.10.0 // indirect
- github.com/aymanbagabas/go-osc52 v1.0.3 // indirect
+ github.com/alecthomas/chroma/v2 v2.8.0 // indirect
+ github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
github.com/aymerick/douceur v0.2.0 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cenkalti/backoff/v4 v4.1.3 // indirect
@@ -104,7 +104,7 @@ require (
github.com/mitchellh/reflectwalk v1.0.0 // indirect
github.com/moby/term v0.0.0-20201216013528-df9cb8a40635 // indirect
github.com/muesli/reflow v0.3.0 // indirect
- github.com/muesli/termenv v0.13.0 // indirect
+ github.com/muesli/termenv v0.15.2 // indirect
github.com/nwaples/rardecode v1.1.0 // indirect
github.com/olekukonko/tablewriter v0.0.5 // indirect
github.com/opencontainers/go-digest v1.0.0 // indirect
@@ -126,8 +126,8 @@ require (
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect
github.com/xeipuuv/gojsonschema v1.2.0 // indirect
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect
- github.com/yuin/goldmark v1.5.2 // indirect
- github.com/yuin/goldmark-emoji v1.0.1 // indirect
+ github.com/yuin/goldmark v1.5.4 // indirect
+ github.com/yuin/goldmark-emoji v1.0.2 // indirect
go.opentelemetry.io/otel/sdk v1.19.0 // indirect
go.opentelemetry.io/otel/trace v1.19.0 // indirect
golang.org/x/crypto v0.21.0 // indirect
diff --git a/go.sum b/go.sum
index c66e9b7c..359bc836 100644
--- a/go.sum
+++ b/go.sum
@@ -17,8 +17,8 @@ github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 h1:TngWCqHvy9oXAN6lEV
github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5/go.mod h1:lmUJ/7eu/Q8D7ML55dXQrVaamCz2vxCfdQBasLZfHKk=
github.com/alecthomas/assert/v2 v2.6.0 h1:o3WJwILtexrEUk3cUVal3oiQY2tfgr/FHWiz/v2n4FU=
github.com/alecthomas/assert/v2 v2.6.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k=
-github.com/alecthomas/chroma v0.10.0 h1:7XDcGkCQopCNKjZHfYrNLraA+M7e0fMiJ/Mfikbfjek=
-github.com/alecthomas/chroma v0.10.0/go.mod h1:jtJATyUxlIORhUOFNA9NZDWGAQ8wpxQQqNSB4rjA/1s=
+github.com/alecthomas/chroma/v2 v2.8.0 h1:w9WJUjFFmHHB2e8mRpL9jjy3alYDlU0QLDezj1xE264=
+github.com/alecthomas/chroma/v2 v2.8.0/go.mod h1:yrkMI9807G1ROx13fhe1v6PN2DDeaR73L3d+1nmYQtw=
github.com/alecthomas/kong v0.9.0 h1:G5diXxc85KvoV2f0ZRVuMsi45IrBgx9zDNGNj165aPA=
github.com/alecthomas/kong v0.9.0/go.mod h1:Y47y5gKfHp1hDc7CH7OeXgLIpp+Q2m1Ni0L5s3bI8Os=
github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc=
@@ -26,8 +26,8 @@ github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW5
github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y=
github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs=
github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
-github.com/aymanbagabas/go-osc52 v1.0.3 h1:DTwqENW7X9arYimJrPeGZcV0ln14sGMt3pHZspWD+Mg=
-github.com/aymanbagabas/go-osc52 v1.0.3/go.mod h1:zT8H+Rk4VSabYN90pWyugflM3ZhpTZNC7cASDfUCdT4=
+github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=
+github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8=
github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk=
github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
@@ -36,8 +36,8 @@ github.com/cenkalti/backoff/v4 v4.1.3 h1:cFAlzYUlVYDysBEH2T5hyJZMh3+5+WCBvSnK6Q8
github.com/cenkalti/backoff/v4 v4.1.3/go.mod h1:scbssz8iZGpm3xbr14ovlUdkxfGXNInqkPWOWmG2CLw=
github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
-github.com/charmbracelet/glamour v0.6.0 h1:wi8fse3Y7nfcabbbDuwolqTqMQPMnVPeZhDM273bISc=
-github.com/charmbracelet/glamour v0.6.0/go.mod h1:taqWV4swIMMbWALc0m7AfE9JkPSU8om2538k9ITBxOc=
+github.com/charmbracelet/glamour v0.7.0 h1:2BtKGZ4iVJCDfMF229EzbeR1QRKLWztO9dMtjmqZSng=
+github.com/charmbracelet/glamour v0.7.0/go.mod h1:jUMh5MeihljJPQbJ/wf4ldw2+yBP59+ctV36jASy7ps=
github.com/checkpoint-restore/go-criu/v5 v5.3.0/go.mod h1:E/eQpaFtUKGOOSEBZgmKAcn+zUUwWxqcaKZlF54wK8E=
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
@@ -55,7 +55,6 @@ github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxG
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/dlclark/regexp2 v1.4.0/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc=
github.com/dlclark/regexp2 v1.8.1 h1:6Lcdwya6GjPUNsBct8Lg/yRPwMhABj269AAzdGSiR+0=
github.com/dlclark/regexp2 v1.8.1/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
github.com/docker/cli v20.10.17+incompatible h1:eO2KS7ZFeov5UJeaDmIs1NFEDRf32PaqRpvoEkKBy5M=
@@ -212,7 +211,6 @@ github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zk
github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4=
github.com/mholt/archiver/v3 v3.5.1 h1:rDjOBX9JSF5BvoJGvjqK479aL70qh9DIpZCl+k7Clwo=
github.com/mholt/archiver/v3 v3.5.1/go.mod h1:e3dqJ7H78uzsRSEACH1joayhuSyhnonssnDhppzS1L4=
-github.com/microcosm-cc/bluemonday v1.0.21/go.mod h1:ytNkv4RrDrLJ2pqlsSI46O6IVXmZOBBD4SaJyDwwTkM=
github.com/microcosm-cc/bluemonday v1.0.26 h1:xbqSvqzQMeEHCqMi64VAs4d8uy6Mequs3rQ0k/Khz58=
github.com/microcosm-cc/bluemonday v1.0.26/go.mod h1:JyzOCs9gkyQyjs+6h10UEVSe02CGwkhd72Xdqh78TWs=
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ=
@@ -235,8 +233,8 @@ github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af h1:XFq6
github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af/go.mod h1:8ufRkpz/S/9ahkaxzZ5i4WMgO9w4InEhuRoT7vK5Rnw=
github.com/muesli/reflow v0.3.0 h1:IFsN6K9NfGtjeggFP+68I4chLZV2yIKsXJFNZ+eWh6s=
github.com/muesli/reflow v0.3.0/go.mod h1:pbwTDkVPibjO2kyvBQRBxTWEEGDGq0FlB1BIKtnHY/8=
-github.com/muesli/termenv v0.13.0 h1:wK20DRpJdDX8b7Ek2QfhvqhRQFZ237RGRO0RQ/Iqdy0=
-github.com/muesli/termenv v0.13.0/go.mod h1:sP1+uffeLaEYpyOTb8pLCUctGcGLnoFjSn4YJK5e2bc=
+github.com/muesli/termenv v0.15.2 h1:GohcuySI0QmI3wN8Ok9PtKGkgkFIk7y6Vpb5PvrY+Wo=
+github.com/muesli/termenv v0.15.2/go.mod h1:Epx+iuz8sNs7mNKhxzH4fWXGNpZwUaJKRS1noLXviQ8=
github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20231022042237-c25dc5193530 h1:YXMxHwHMB9jCBo2Yu5gz3mTB3T1TnZs/HmPLv15LUSA=
github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20231022042237-c25dc5193530/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
github.com/nwaples/rardecode v1.1.0 h1:vSxaY8vQhOcVr4mm5e8XllHWTiM4JF507A0Katqw7MQ=
@@ -330,7 +328,6 @@ github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXf
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
-github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
@@ -371,11 +368,12 @@ github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofm
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+github.com/yuin/goldmark v1.3.7/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
-github.com/yuin/goldmark v1.5.2 h1:ALmeCk/px5FSm1MAcFBAsVKZjDuMVj8Tm7FFIlMJnqU=
-github.com/yuin/goldmark v1.5.2/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
-github.com/yuin/goldmark-emoji v1.0.1 h1:ctuWEyzGBwiucEqxzwe0SOYDXPAucOrE9NQC18Wa1os=
-github.com/yuin/goldmark-emoji v1.0.1/go.mod h1:2w1E6FEWLcDQkoTE+7HU6QF1F6SLlNGjRIBbIZQFqkQ=
+github.com/yuin/goldmark v1.5.4 h1:2uY/xC0roWy8IBEGLgB1ywIoEJFGmRrX21YQcvGZzjU=
+github.com/yuin/goldmark v1.5.4/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
+github.com/yuin/goldmark-emoji v1.0.2 h1:c/RgTShNgHTtc6xdz2KKI74jJr6rWi7FPgnP9GAsO5s=
+github.com/yuin/goldmark-emoji v1.0.2/go.mod h1:RhP/RWpexdp+KHs7ghKnifRoIs/Bq4nDS7tRbCkOwKY=
github.com/yusufpapurcu/wmi v1.2.3 h1:E1ctvB7uKFMOJw3fdOW32DwGE9I7t++CRUEMKvFoFiw=
github.com/yusufpapurcu/wmi v1.2.3/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
go.opentelemetry.io/otel v1.19.0 h1:MuS/TNf4/j4IXsZuJegVzI1cwut7Qc00344rgH7p8bs=
@@ -413,7 +411,6 @@ golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
-golang.org/x/net v0.0.0-20221002022538-bcab6841153b/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk=
golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY=
golang.org/x/net v0.22.0 h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc=
golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
@@ -450,7 +447,6 @@ golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20211116061358-0a5406a5449c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
From b1a242251ca252127f830ae20de9d55d40c21e4c Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Thu, 11 Apr 2024 18:26:03 +0200
Subject: [PATCH 0127/2750] ci: fixup upload artifact name
Signed-off-by: Ettore Di Giacinto
---
.github/workflows/release.yaml | 10 ++--------
1 file changed, 2 insertions(+), 8 deletions(-)
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index 8198fb3d..a69a2b05 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -84,7 +84,7 @@ jobs:
fi
- uses: actions/upload-artifact@v4
with:
- name: ${{ matrix.build }}
+ name: LocalAI-linux-${{ matrix.build }}
path: release/
- name: Release
uses: softprops/action-gh-release@v1
@@ -115,12 +115,6 @@ jobs:
with:
name: stablediffusion
path: release/
- - name: Release
- uses: softprops/action-gh-release@v1
- if: startsWith(github.ref, 'refs/tags/')
- with:
- files: |
- release/*
build-macOS:
strategy:
@@ -156,7 +150,7 @@ jobs:
make dist
- uses: actions/upload-artifact@v4
with:
- name: ${{ matrix.build }}
+ name: LocalAI-MacOS-${{ matrix.build }}
path: release/
- name: Release
uses: softprops/action-gh-release@v1
From cbda06fb96661e7c9386ccca1c6dcaf652083a70 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 11 Apr 2024 16:52:54 +0000
Subject: [PATCH 0128/2750] build(deps): bump github.com/gofiber/fiber/v2 from
2.52.0 to 2.52.4 (#2008)
Bumps [github.com/gofiber/fiber/v2](https://github.com/gofiber/fiber) from 2.52.0 to 2.52.4.
- [Release notes](https://github.com/gofiber/fiber/releases)
- [Commits](https://github.com/gofiber/fiber/compare/v2.52.0...v2.52.4)
---
updated-dependencies:
- dependency-name: github.com/gofiber/fiber/v2
dependency-type: direct:production
update-type: version-update:semver-patch
...
Signed-off-by: dependabot[bot]
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
go.mod | 2 +-
go.sum | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/go.mod b/go.mod
index 081c25d6..298f2d69 100644
--- a/go.mod
+++ b/go.mod
@@ -12,7 +12,7 @@ require (
github.com/go-audio/wav v1.1.0
github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1
github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428
- github.com/gofiber/fiber/v2 v2.52.1
+ github.com/gofiber/fiber/v2 v2.52.4
github.com/gofiber/swagger v1.0.0
github.com/gofiber/template/html/v2 v2.1.1
github.com/google/uuid v1.5.0
diff --git a/go.sum b/go.sum
index 359bc836..551dd922 100644
--- a/go.sum
+++ b/go.sum
@@ -109,8 +109,8 @@ github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEe
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/godbus/dbus/v5 v5.0.6/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
-github.com/gofiber/fiber/v2 v2.52.1 h1:1RoU2NS+b98o1L77sdl5mboGPiW+0Ypsi5oLmcYlgHI=
-github.com/gofiber/fiber/v2 v2.52.1/go.mod h1:KEOE+cXMhXG0zHc9d8+E38hoX+ZN7bhOtgeF2oT6jrQ=
+github.com/gofiber/fiber/v2 v2.52.4 h1:P+T+4iK7VaqUsq2PALYEfBBo6bJZ4q3FP8cZ84EggTM=
+github.com/gofiber/fiber/v2 v2.52.4/go.mod h1:KEOE+cXMhXG0zHc9d8+E38hoX+ZN7bhOtgeF2oT6jrQ=
github.com/gofiber/swagger v1.0.0 h1:BzUzDS9ZT6fDUa692kxmfOjc1DZiloLiPK/W5z1H1tc=
github.com/gofiber/swagger v1.0.0/go.mod h1:QrYNF1Yrc7ggGK6ATsJ6yfH/8Zi5bu9lA7wB8TmCecg=
github.com/gofiber/template v1.8.3 h1:hzHdvMwMo/T2kouz2pPCA0zGiLCeMnoGsQZBTSYgZxc=
From 12c0d9443ecfa7367e041b900a243e0c77726dce Mon Sep 17 00:00:00 2001
From: Ludovic Leroux
Date: Thu, 11 Apr 2024 13:20:22 -0400
Subject: [PATCH 0129/2750] feat: use tokenizer.apply_chat_template() in vLLM
(#1990)
Use tokenizer.apply_chat_template() in vLLM
Signed-off-by: Ludovic LEROUX
---
backend/backend.proto | 7 +
backend/python/autogptq/backend_pb2.py | 98 ++-
backend/python/autogptq/backend_pb2_grpc.py | 132 +++
backend/python/bark/backend_pb2.py | 98 ++-
backend/python/bark/backend_pb2_grpc.py | 132 +++
backend/python/coqui/backend_pb2.py | 98 ++-
backend/python/coqui/backend_pb2_grpc.py | 132 +++
backend/python/diffusers/backend_pb2.py | 98 ++-
backend/python/diffusers/backend_pb2_grpc.py | 132 +++
backend/python/exllama/backend_pb2.py | 98 ++-
backend/python/exllama/backend_pb2_grpc.py | 132 +++
backend/python/exllama2/backend_pb2.py | 98 ++-
backend/python/exllama2/backend_pb2_grpc.py | 132 +++
backend/python/mamba/backend_pb2.py | 98 ++-
backend/python/mamba/backend_pb2_grpc.py | 132 +++
backend/python/petals/backend_pb2.py | 98 ++-
backend/python/petals/backend_pb2_grpc.py | 132 +++
.../sentencetransformers/backend_pb2.py | 98 ++-
.../sentencetransformers/backend_pb2_grpc.py | 132 +++
.../transformers-musicgen/backend_pb2.py | 98 ++-
.../transformers-musicgen/backend_pb2_grpc.py | 132 +++
backend/python/transformers/backend_pb2.py | 98 ++-
.../python/transformers/backend_pb2_grpc.py | 132 +++
backend/python/vall-e-x/backend_pb2.py | 98 ++-
backend/python/vall-e-x/backend_pb2_grpc.py | 132 +++
backend/python/vllm/backend_pb2.py | 98 ++-
backend/python/vllm/backend_pb2_grpc.py | 132 +++
backend/python/vllm/backend_vllm.py | 23 +-
core/backend/llm.go | 25 +-
core/config/backend_config.go | 11 +-
core/http/endpoints/openai/chat.go | 256 +++---
core/http/endpoints/openai/inference.go | 2 +-
pkg/grpc/proto/backend.pb.go | 761 ++++++++++--------
pkg/grpc/proto/backend_grpc.pb.go | 2 +-
34 files changed, 3088 insertions(+), 989 deletions(-)
diff --git a/backend/backend.proto b/backend/backend.proto
index c3d3180b..56d919ef 100644
--- a/backend/backend.proto
+++ b/backend/backend.proto
@@ -107,6 +107,8 @@ message PredictOptions {
string NegativePrompt = 40;
int32 NDraft = 41;
repeated string Images = 42;
+ bool UseTokenizerTemplate = 43;
+ repeated Message Messages = 44;
}
// The response message containing the result
@@ -256,3 +258,8 @@ message StatusResponse {
State state = 1;
MemoryUsageData memory = 2;
}
+
+message Message {
+ string role = 1;
+ string content = 2;
+}
\ No newline at end of file
diff --git a/backend/python/autogptq/backend_pb2.py b/backend/python/autogptq/backend_pb2.py
index 08b896c7..24b6de3b 100644
--- a/backend/python/autogptq/backend_pb2.py
+++ b/backend/python/autogptq/backend_pb2.py
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: backend.proto
+# Protobuf Python Version: 4.25.1
"""Generated protocol buffer code."""
from google.protobuf import descriptor as _descriptor
from google.protobuf import descriptor_pool as _descriptor_pool
@@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default()
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
_globals = globals()
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals)
if _descriptor._USE_C_DESCRIPTORS == False:
-
- DESCRIPTOR._options = None
- DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
- _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None
- _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001'
- _globals['_HEALTHMESSAGE']._serialized_start=26
- _globals['_HEALTHMESSAGE']._serialized_end=41
- _globals['_PREDICTOPTIONS']._serialized_start=44
- _globals['_PREDICTOPTIONS']._serialized_end=850
- _globals['_REPLY']._serialized_start=852
- _globals['_REPLY']._serialized_end=876
- _globals['_MODELOPTIONS']._serialized_start=879
- _globals['_MODELOPTIONS']._serialized_end=1951
- _globals['_RESULT']._serialized_start=1953
- _globals['_RESULT']._serialized_end=1995
- _globals['_EMBEDDINGRESULT']._serialized_start=1997
- _globals['_EMBEDDINGRESULT']._serialized_end=2034
- _globals['_TRANSCRIPTREQUEST']._serialized_start=2036
- _globals['_TRANSCRIPTREQUEST']._serialized_end=2103
- _globals['_TRANSCRIPTRESULT']._serialized_start=2105
- _globals['_TRANSCRIPTRESULT']._serialized_end=2183
- _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185
- _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274
- _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277
- _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492
- _globals['_TTSREQUEST']._serialized_start=2494
- _globals['_TTSREQUEST']._serialized_end=2563
- _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565
- _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619
- _globals['_MEMORYUSAGEDATA']._serialized_start=2622
- _globals['_MEMORYUSAGEDATA']._serialized_end=2764
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764
- _globals['_STATUSRESPONSE']._serialized_start=2767
- _globals['_STATUSRESPONSE']._serialized_end=2940
- _globals['_STATUSRESPONSE_STATE']._serialized_start=2873
- _globals['_STATUSRESPONSE_STATE']._serialized_end=2940
- _globals['_BACKEND']._serialized_start=2943
- _globals['_BACKEND']._serialized_end=3571
+ _globals['DESCRIPTOR']._options = None
+ _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001'
+ _globals['_STORESKEY']._serialized_start=26
+ _globals['_STORESKEY']._serialized_end=53
+ _globals['_STORESVALUE']._serialized_start=55
+ _globals['_STORESVALUE']._serialized_end=83
+ _globals['_STORESSETOPTIONS']._serialized_start=85
+ _globals['_STORESSETOPTIONS']._serialized_end=175
+ _globals['_STORESDELETEOPTIONS']._serialized_start=177
+ _globals['_STORESDELETEOPTIONS']._serialized_end=232
+ _globals['_STORESGETOPTIONS']._serialized_start=234
+ _globals['_STORESGETOPTIONS']._serialized_end=286
+ _globals['_STORESGETRESULT']._serialized_start=288
+ _globals['_STORESGETRESULT']._serialized_end=377
+ _globals['_STORESFINDOPTIONS']._serialized_start=379
+ _globals['_STORESFINDOPTIONS']._serialized_end=445
+ _globals['_STORESFINDRESULT']._serialized_start=447
+ _globals['_STORESFINDRESULT']._serialized_end=559
+ _globals['_HEALTHMESSAGE']._serialized_start=561
+ _globals['_HEALTHMESSAGE']._serialized_end=576
+ _globals['_PREDICTOPTIONS']._serialized_start=579
+ _globals['_PREDICTOPTIONS']._serialized_end=1451
+ _globals['_REPLY']._serialized_start=1453
+ _globals['_REPLY']._serialized_end=1477
+ _globals['_MODELOPTIONS']._serialized_start=1480
+ _globals['_MODELOPTIONS']._serialized_end=2552
+ _globals['_RESULT']._serialized_start=2554
+ _globals['_RESULT']._serialized_end=2596
+ _globals['_EMBEDDINGRESULT']._serialized_start=2598
+ _globals['_EMBEDDINGRESULT']._serialized_end=2635
+ _globals['_TRANSCRIPTREQUEST']._serialized_start=2637
+ _globals['_TRANSCRIPTREQUEST']._serialized_end=2704
+ _globals['_TRANSCRIPTRESULT']._serialized_start=2706
+ _globals['_TRANSCRIPTRESULT']._serialized_end=2784
+ _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786
+ _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875
+ _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878
+ _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093
+ _globals['_TTSREQUEST']._serialized_start=3095
+ _globals['_TTSREQUEST']._serialized_end=3164
+ _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166
+ _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220
+ _globals['_MEMORYUSAGEDATA']._serialized_start=3223
+ _globals['_MEMORYUSAGEDATA']._serialized_end=3365
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365
+ _globals['_STATUSRESPONSE']._serialized_start=3368
+ _globals['_STATUSRESPONSE']._serialized_end=3541
+ _globals['_STATUSRESPONSE_STATE']._serialized_start=3474
+ _globals['_STATUSRESPONSE_STATE']._serialized_end=3541
+ _globals['_MESSAGE']._serialized_start=3543
+ _globals['_MESSAGE']._serialized_end=3583
+ _globals['_BACKEND']._serialized_start=3586
+ _globals['_BACKEND']._serialized_end=4477
# @@protoc_insertion_point(module_scope)
diff --git a/backend/python/autogptq/backend_pb2_grpc.py b/backend/python/autogptq/backend_pb2_grpc.py
index 79a7677f..e06fccf3 100644
--- a/backend/python/autogptq/backend_pb2_grpc.py
+++ b/backend/python/autogptq/backend_pb2_grpc.py
@@ -64,6 +64,26 @@ class BackendStub(object):
request_serializer=backend__pb2.HealthMessage.SerializeToString,
response_deserializer=backend__pb2.StatusResponse.FromString,
)
+ self.StoresSet = channel.unary_unary(
+ '/backend.Backend/StoresSet',
+ request_serializer=backend__pb2.StoresSetOptions.SerializeToString,
+ response_deserializer=backend__pb2.Result.FromString,
+ )
+ self.StoresDelete = channel.unary_unary(
+ '/backend.Backend/StoresDelete',
+ request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString,
+ response_deserializer=backend__pb2.Result.FromString,
+ )
+ self.StoresGet = channel.unary_unary(
+ '/backend.Backend/StoresGet',
+ request_serializer=backend__pb2.StoresGetOptions.SerializeToString,
+ response_deserializer=backend__pb2.StoresGetResult.FromString,
+ )
+ self.StoresFind = channel.unary_unary(
+ '/backend.Backend/StoresFind',
+ request_serializer=backend__pb2.StoresFindOptions.SerializeToString,
+ response_deserializer=backend__pb2.StoresFindResult.FromString,
+ )
class BackendServicer(object):
@@ -129,6 +149,30 @@ class BackendServicer(object):
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
+ def StoresSet(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresDelete(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresGet(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresFind(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
def add_BackendServicer_to_server(servicer, server):
rpc_method_handlers = {
@@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server):
request_deserializer=backend__pb2.HealthMessage.FromString,
response_serializer=backend__pb2.StatusResponse.SerializeToString,
),
+ 'StoresSet': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresSet,
+ request_deserializer=backend__pb2.StoresSetOptions.FromString,
+ response_serializer=backend__pb2.Result.SerializeToString,
+ ),
+ 'StoresDelete': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresDelete,
+ request_deserializer=backend__pb2.StoresDeleteOptions.FromString,
+ response_serializer=backend__pb2.Result.SerializeToString,
+ ),
+ 'StoresGet': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresGet,
+ request_deserializer=backend__pb2.StoresGetOptions.FromString,
+ response_serializer=backend__pb2.StoresGetResult.SerializeToString,
+ ),
+ 'StoresFind': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresFind,
+ request_deserializer=backend__pb2.StoresFindOptions.FromString,
+ response_serializer=backend__pb2.StoresFindResult.SerializeToString,
+ ),
}
generic_handler = grpc.method_handlers_generic_handler(
'backend.Backend', rpc_method_handlers)
@@ -361,3 +425,71 @@ class Backend(object):
backend__pb2.StatusResponse.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresSet(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet',
+ backend__pb2.StoresSetOptions.SerializeToString,
+ backend__pb2.Result.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresDelete(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete',
+ backend__pb2.StoresDeleteOptions.SerializeToString,
+ backend__pb2.Result.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresGet(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet',
+ backend__pb2.StoresGetOptions.SerializeToString,
+ backend__pb2.StoresGetResult.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresFind(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind',
+ backend__pb2.StoresFindOptions.SerializeToString,
+ backend__pb2.StoresFindResult.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
diff --git a/backend/python/bark/backend_pb2.py b/backend/python/bark/backend_pb2.py
index 08b896c7..24b6de3b 100644
--- a/backend/python/bark/backend_pb2.py
+++ b/backend/python/bark/backend_pb2.py
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: backend.proto
+# Protobuf Python Version: 4.25.1
"""Generated protocol buffer code."""
from google.protobuf import descriptor as _descriptor
from google.protobuf import descriptor_pool as _descriptor_pool
@@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default()
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
_globals = globals()
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals)
if _descriptor._USE_C_DESCRIPTORS == False:
-
- DESCRIPTOR._options = None
- DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
- _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None
- _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001'
- _globals['_HEALTHMESSAGE']._serialized_start=26
- _globals['_HEALTHMESSAGE']._serialized_end=41
- _globals['_PREDICTOPTIONS']._serialized_start=44
- _globals['_PREDICTOPTIONS']._serialized_end=850
- _globals['_REPLY']._serialized_start=852
- _globals['_REPLY']._serialized_end=876
- _globals['_MODELOPTIONS']._serialized_start=879
- _globals['_MODELOPTIONS']._serialized_end=1951
- _globals['_RESULT']._serialized_start=1953
- _globals['_RESULT']._serialized_end=1995
- _globals['_EMBEDDINGRESULT']._serialized_start=1997
- _globals['_EMBEDDINGRESULT']._serialized_end=2034
- _globals['_TRANSCRIPTREQUEST']._serialized_start=2036
- _globals['_TRANSCRIPTREQUEST']._serialized_end=2103
- _globals['_TRANSCRIPTRESULT']._serialized_start=2105
- _globals['_TRANSCRIPTRESULT']._serialized_end=2183
- _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185
- _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274
- _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277
- _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492
- _globals['_TTSREQUEST']._serialized_start=2494
- _globals['_TTSREQUEST']._serialized_end=2563
- _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565
- _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619
- _globals['_MEMORYUSAGEDATA']._serialized_start=2622
- _globals['_MEMORYUSAGEDATA']._serialized_end=2764
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764
- _globals['_STATUSRESPONSE']._serialized_start=2767
- _globals['_STATUSRESPONSE']._serialized_end=2940
- _globals['_STATUSRESPONSE_STATE']._serialized_start=2873
- _globals['_STATUSRESPONSE_STATE']._serialized_end=2940
- _globals['_BACKEND']._serialized_start=2943
- _globals['_BACKEND']._serialized_end=3571
+ _globals['DESCRIPTOR']._options = None
+ _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001'
+ _globals['_STORESKEY']._serialized_start=26
+ _globals['_STORESKEY']._serialized_end=53
+ _globals['_STORESVALUE']._serialized_start=55
+ _globals['_STORESVALUE']._serialized_end=83
+ _globals['_STORESSETOPTIONS']._serialized_start=85
+ _globals['_STORESSETOPTIONS']._serialized_end=175
+ _globals['_STORESDELETEOPTIONS']._serialized_start=177
+ _globals['_STORESDELETEOPTIONS']._serialized_end=232
+ _globals['_STORESGETOPTIONS']._serialized_start=234
+ _globals['_STORESGETOPTIONS']._serialized_end=286
+ _globals['_STORESGETRESULT']._serialized_start=288
+ _globals['_STORESGETRESULT']._serialized_end=377
+ _globals['_STORESFINDOPTIONS']._serialized_start=379
+ _globals['_STORESFINDOPTIONS']._serialized_end=445
+ _globals['_STORESFINDRESULT']._serialized_start=447
+ _globals['_STORESFINDRESULT']._serialized_end=559
+ _globals['_HEALTHMESSAGE']._serialized_start=561
+ _globals['_HEALTHMESSAGE']._serialized_end=576
+ _globals['_PREDICTOPTIONS']._serialized_start=579
+ _globals['_PREDICTOPTIONS']._serialized_end=1451
+ _globals['_REPLY']._serialized_start=1453
+ _globals['_REPLY']._serialized_end=1477
+ _globals['_MODELOPTIONS']._serialized_start=1480
+ _globals['_MODELOPTIONS']._serialized_end=2552
+ _globals['_RESULT']._serialized_start=2554
+ _globals['_RESULT']._serialized_end=2596
+ _globals['_EMBEDDINGRESULT']._serialized_start=2598
+ _globals['_EMBEDDINGRESULT']._serialized_end=2635
+ _globals['_TRANSCRIPTREQUEST']._serialized_start=2637
+ _globals['_TRANSCRIPTREQUEST']._serialized_end=2704
+ _globals['_TRANSCRIPTRESULT']._serialized_start=2706
+ _globals['_TRANSCRIPTRESULT']._serialized_end=2784
+ _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786
+ _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875
+ _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878
+ _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093
+ _globals['_TTSREQUEST']._serialized_start=3095
+ _globals['_TTSREQUEST']._serialized_end=3164
+ _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166
+ _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220
+ _globals['_MEMORYUSAGEDATA']._serialized_start=3223
+ _globals['_MEMORYUSAGEDATA']._serialized_end=3365
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365
+ _globals['_STATUSRESPONSE']._serialized_start=3368
+ _globals['_STATUSRESPONSE']._serialized_end=3541
+ _globals['_STATUSRESPONSE_STATE']._serialized_start=3474
+ _globals['_STATUSRESPONSE_STATE']._serialized_end=3541
+ _globals['_MESSAGE']._serialized_start=3543
+ _globals['_MESSAGE']._serialized_end=3583
+ _globals['_BACKEND']._serialized_start=3586
+ _globals['_BACKEND']._serialized_end=4477
# @@protoc_insertion_point(module_scope)
diff --git a/backend/python/bark/backend_pb2_grpc.py b/backend/python/bark/backend_pb2_grpc.py
index 79a7677f..e06fccf3 100644
--- a/backend/python/bark/backend_pb2_grpc.py
+++ b/backend/python/bark/backend_pb2_grpc.py
@@ -64,6 +64,26 @@ class BackendStub(object):
request_serializer=backend__pb2.HealthMessage.SerializeToString,
response_deserializer=backend__pb2.StatusResponse.FromString,
)
+ self.StoresSet = channel.unary_unary(
+ '/backend.Backend/StoresSet',
+ request_serializer=backend__pb2.StoresSetOptions.SerializeToString,
+ response_deserializer=backend__pb2.Result.FromString,
+ )
+ self.StoresDelete = channel.unary_unary(
+ '/backend.Backend/StoresDelete',
+ request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString,
+ response_deserializer=backend__pb2.Result.FromString,
+ )
+ self.StoresGet = channel.unary_unary(
+ '/backend.Backend/StoresGet',
+ request_serializer=backend__pb2.StoresGetOptions.SerializeToString,
+ response_deserializer=backend__pb2.StoresGetResult.FromString,
+ )
+ self.StoresFind = channel.unary_unary(
+ '/backend.Backend/StoresFind',
+ request_serializer=backend__pb2.StoresFindOptions.SerializeToString,
+ response_deserializer=backend__pb2.StoresFindResult.FromString,
+ )
class BackendServicer(object):
@@ -129,6 +149,30 @@ class BackendServicer(object):
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
+ def StoresSet(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresDelete(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresGet(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresFind(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
def add_BackendServicer_to_server(servicer, server):
rpc_method_handlers = {
@@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server):
request_deserializer=backend__pb2.HealthMessage.FromString,
response_serializer=backend__pb2.StatusResponse.SerializeToString,
),
+ 'StoresSet': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresSet,
+ request_deserializer=backend__pb2.StoresSetOptions.FromString,
+ response_serializer=backend__pb2.Result.SerializeToString,
+ ),
+ 'StoresDelete': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresDelete,
+ request_deserializer=backend__pb2.StoresDeleteOptions.FromString,
+ response_serializer=backend__pb2.Result.SerializeToString,
+ ),
+ 'StoresGet': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresGet,
+ request_deserializer=backend__pb2.StoresGetOptions.FromString,
+ response_serializer=backend__pb2.StoresGetResult.SerializeToString,
+ ),
+ 'StoresFind': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresFind,
+ request_deserializer=backend__pb2.StoresFindOptions.FromString,
+ response_serializer=backend__pb2.StoresFindResult.SerializeToString,
+ ),
}
generic_handler = grpc.method_handlers_generic_handler(
'backend.Backend', rpc_method_handlers)
@@ -361,3 +425,71 @@ class Backend(object):
backend__pb2.StatusResponse.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresSet(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet',
+ backend__pb2.StoresSetOptions.SerializeToString,
+ backend__pb2.Result.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresDelete(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete',
+ backend__pb2.StoresDeleteOptions.SerializeToString,
+ backend__pb2.Result.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresGet(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet',
+ backend__pb2.StoresGetOptions.SerializeToString,
+ backend__pb2.StoresGetResult.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresFind(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind',
+ backend__pb2.StoresFindOptions.SerializeToString,
+ backend__pb2.StoresFindResult.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
diff --git a/backend/python/coqui/backend_pb2.py b/backend/python/coqui/backend_pb2.py
index 08b896c7..24b6de3b 100644
--- a/backend/python/coqui/backend_pb2.py
+++ b/backend/python/coqui/backend_pb2.py
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: backend.proto
+# Protobuf Python Version: 4.25.1
"""Generated protocol buffer code."""
from google.protobuf import descriptor as _descriptor
from google.protobuf import descriptor_pool as _descriptor_pool
@@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default()
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
_globals = globals()
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals)
if _descriptor._USE_C_DESCRIPTORS == False:
-
- DESCRIPTOR._options = None
- DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
- _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None
- _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001'
- _globals['_HEALTHMESSAGE']._serialized_start=26
- _globals['_HEALTHMESSAGE']._serialized_end=41
- _globals['_PREDICTOPTIONS']._serialized_start=44
- _globals['_PREDICTOPTIONS']._serialized_end=850
- _globals['_REPLY']._serialized_start=852
- _globals['_REPLY']._serialized_end=876
- _globals['_MODELOPTIONS']._serialized_start=879
- _globals['_MODELOPTIONS']._serialized_end=1951
- _globals['_RESULT']._serialized_start=1953
- _globals['_RESULT']._serialized_end=1995
- _globals['_EMBEDDINGRESULT']._serialized_start=1997
- _globals['_EMBEDDINGRESULT']._serialized_end=2034
- _globals['_TRANSCRIPTREQUEST']._serialized_start=2036
- _globals['_TRANSCRIPTREQUEST']._serialized_end=2103
- _globals['_TRANSCRIPTRESULT']._serialized_start=2105
- _globals['_TRANSCRIPTRESULT']._serialized_end=2183
- _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185
- _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274
- _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277
- _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492
- _globals['_TTSREQUEST']._serialized_start=2494
- _globals['_TTSREQUEST']._serialized_end=2563
- _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565
- _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619
- _globals['_MEMORYUSAGEDATA']._serialized_start=2622
- _globals['_MEMORYUSAGEDATA']._serialized_end=2764
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764
- _globals['_STATUSRESPONSE']._serialized_start=2767
- _globals['_STATUSRESPONSE']._serialized_end=2940
- _globals['_STATUSRESPONSE_STATE']._serialized_start=2873
- _globals['_STATUSRESPONSE_STATE']._serialized_end=2940
- _globals['_BACKEND']._serialized_start=2943
- _globals['_BACKEND']._serialized_end=3571
+ _globals['DESCRIPTOR']._options = None
+ _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001'
+ _globals['_STORESKEY']._serialized_start=26
+ _globals['_STORESKEY']._serialized_end=53
+ _globals['_STORESVALUE']._serialized_start=55
+ _globals['_STORESVALUE']._serialized_end=83
+ _globals['_STORESSETOPTIONS']._serialized_start=85
+ _globals['_STORESSETOPTIONS']._serialized_end=175
+ _globals['_STORESDELETEOPTIONS']._serialized_start=177
+ _globals['_STORESDELETEOPTIONS']._serialized_end=232
+ _globals['_STORESGETOPTIONS']._serialized_start=234
+ _globals['_STORESGETOPTIONS']._serialized_end=286
+ _globals['_STORESGETRESULT']._serialized_start=288
+ _globals['_STORESGETRESULT']._serialized_end=377
+ _globals['_STORESFINDOPTIONS']._serialized_start=379
+ _globals['_STORESFINDOPTIONS']._serialized_end=445
+ _globals['_STORESFINDRESULT']._serialized_start=447
+ _globals['_STORESFINDRESULT']._serialized_end=559
+ _globals['_HEALTHMESSAGE']._serialized_start=561
+ _globals['_HEALTHMESSAGE']._serialized_end=576
+ _globals['_PREDICTOPTIONS']._serialized_start=579
+ _globals['_PREDICTOPTIONS']._serialized_end=1451
+ _globals['_REPLY']._serialized_start=1453
+ _globals['_REPLY']._serialized_end=1477
+ _globals['_MODELOPTIONS']._serialized_start=1480
+ _globals['_MODELOPTIONS']._serialized_end=2552
+ _globals['_RESULT']._serialized_start=2554
+ _globals['_RESULT']._serialized_end=2596
+ _globals['_EMBEDDINGRESULT']._serialized_start=2598
+ _globals['_EMBEDDINGRESULT']._serialized_end=2635
+ _globals['_TRANSCRIPTREQUEST']._serialized_start=2637
+ _globals['_TRANSCRIPTREQUEST']._serialized_end=2704
+ _globals['_TRANSCRIPTRESULT']._serialized_start=2706
+ _globals['_TRANSCRIPTRESULT']._serialized_end=2784
+ _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786
+ _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875
+ _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878
+ _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093
+ _globals['_TTSREQUEST']._serialized_start=3095
+ _globals['_TTSREQUEST']._serialized_end=3164
+ _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166
+ _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220
+ _globals['_MEMORYUSAGEDATA']._serialized_start=3223
+ _globals['_MEMORYUSAGEDATA']._serialized_end=3365
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365
+ _globals['_STATUSRESPONSE']._serialized_start=3368
+ _globals['_STATUSRESPONSE']._serialized_end=3541
+ _globals['_STATUSRESPONSE_STATE']._serialized_start=3474
+ _globals['_STATUSRESPONSE_STATE']._serialized_end=3541
+ _globals['_MESSAGE']._serialized_start=3543
+ _globals['_MESSAGE']._serialized_end=3583
+ _globals['_BACKEND']._serialized_start=3586
+ _globals['_BACKEND']._serialized_end=4477
# @@protoc_insertion_point(module_scope)
diff --git a/backend/python/coqui/backend_pb2_grpc.py b/backend/python/coqui/backend_pb2_grpc.py
index 79a7677f..e06fccf3 100644
--- a/backend/python/coqui/backend_pb2_grpc.py
+++ b/backend/python/coqui/backend_pb2_grpc.py
@@ -64,6 +64,26 @@ class BackendStub(object):
request_serializer=backend__pb2.HealthMessage.SerializeToString,
response_deserializer=backend__pb2.StatusResponse.FromString,
)
+ self.StoresSet = channel.unary_unary(
+ '/backend.Backend/StoresSet',
+ request_serializer=backend__pb2.StoresSetOptions.SerializeToString,
+ response_deserializer=backend__pb2.Result.FromString,
+ )
+ self.StoresDelete = channel.unary_unary(
+ '/backend.Backend/StoresDelete',
+ request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString,
+ response_deserializer=backend__pb2.Result.FromString,
+ )
+ self.StoresGet = channel.unary_unary(
+ '/backend.Backend/StoresGet',
+ request_serializer=backend__pb2.StoresGetOptions.SerializeToString,
+ response_deserializer=backend__pb2.StoresGetResult.FromString,
+ )
+ self.StoresFind = channel.unary_unary(
+ '/backend.Backend/StoresFind',
+ request_serializer=backend__pb2.StoresFindOptions.SerializeToString,
+ response_deserializer=backend__pb2.StoresFindResult.FromString,
+ )
class BackendServicer(object):
@@ -129,6 +149,30 @@ class BackendServicer(object):
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
+ def StoresSet(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresDelete(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresGet(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresFind(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
def add_BackendServicer_to_server(servicer, server):
rpc_method_handlers = {
@@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server):
request_deserializer=backend__pb2.HealthMessage.FromString,
response_serializer=backend__pb2.StatusResponse.SerializeToString,
),
+ 'StoresSet': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresSet,
+ request_deserializer=backend__pb2.StoresSetOptions.FromString,
+ response_serializer=backend__pb2.Result.SerializeToString,
+ ),
+ 'StoresDelete': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresDelete,
+ request_deserializer=backend__pb2.StoresDeleteOptions.FromString,
+ response_serializer=backend__pb2.Result.SerializeToString,
+ ),
+ 'StoresGet': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresGet,
+ request_deserializer=backend__pb2.StoresGetOptions.FromString,
+ response_serializer=backend__pb2.StoresGetResult.SerializeToString,
+ ),
+ 'StoresFind': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresFind,
+ request_deserializer=backend__pb2.StoresFindOptions.FromString,
+ response_serializer=backend__pb2.StoresFindResult.SerializeToString,
+ ),
}
generic_handler = grpc.method_handlers_generic_handler(
'backend.Backend', rpc_method_handlers)
@@ -361,3 +425,71 @@ class Backend(object):
backend__pb2.StatusResponse.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresSet(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet',
+ backend__pb2.StoresSetOptions.SerializeToString,
+ backend__pb2.Result.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresDelete(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete',
+ backend__pb2.StoresDeleteOptions.SerializeToString,
+ backend__pb2.Result.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresGet(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet',
+ backend__pb2.StoresGetOptions.SerializeToString,
+ backend__pb2.StoresGetResult.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresFind(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind',
+ backend__pb2.StoresFindOptions.SerializeToString,
+ backend__pb2.StoresFindResult.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
diff --git a/backend/python/diffusers/backend_pb2.py b/backend/python/diffusers/backend_pb2.py
index 08b896c7..24b6de3b 100644
--- a/backend/python/diffusers/backend_pb2.py
+++ b/backend/python/diffusers/backend_pb2.py
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: backend.proto
+# Protobuf Python Version: 4.25.1
"""Generated protocol buffer code."""
from google.protobuf import descriptor as _descriptor
from google.protobuf import descriptor_pool as _descriptor_pool
@@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default()
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
_globals = globals()
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals)
if _descriptor._USE_C_DESCRIPTORS == False:
-
- DESCRIPTOR._options = None
- DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
- _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None
- _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001'
- _globals['_HEALTHMESSAGE']._serialized_start=26
- _globals['_HEALTHMESSAGE']._serialized_end=41
- _globals['_PREDICTOPTIONS']._serialized_start=44
- _globals['_PREDICTOPTIONS']._serialized_end=850
- _globals['_REPLY']._serialized_start=852
- _globals['_REPLY']._serialized_end=876
- _globals['_MODELOPTIONS']._serialized_start=879
- _globals['_MODELOPTIONS']._serialized_end=1951
- _globals['_RESULT']._serialized_start=1953
- _globals['_RESULT']._serialized_end=1995
- _globals['_EMBEDDINGRESULT']._serialized_start=1997
- _globals['_EMBEDDINGRESULT']._serialized_end=2034
- _globals['_TRANSCRIPTREQUEST']._serialized_start=2036
- _globals['_TRANSCRIPTREQUEST']._serialized_end=2103
- _globals['_TRANSCRIPTRESULT']._serialized_start=2105
- _globals['_TRANSCRIPTRESULT']._serialized_end=2183
- _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185
- _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274
- _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277
- _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492
- _globals['_TTSREQUEST']._serialized_start=2494
- _globals['_TTSREQUEST']._serialized_end=2563
- _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565
- _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619
- _globals['_MEMORYUSAGEDATA']._serialized_start=2622
- _globals['_MEMORYUSAGEDATA']._serialized_end=2764
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764
- _globals['_STATUSRESPONSE']._serialized_start=2767
- _globals['_STATUSRESPONSE']._serialized_end=2940
- _globals['_STATUSRESPONSE_STATE']._serialized_start=2873
- _globals['_STATUSRESPONSE_STATE']._serialized_end=2940
- _globals['_BACKEND']._serialized_start=2943
- _globals['_BACKEND']._serialized_end=3571
+ _globals['DESCRIPTOR']._options = None
+ _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001'
+ _globals['_STORESKEY']._serialized_start=26
+ _globals['_STORESKEY']._serialized_end=53
+ _globals['_STORESVALUE']._serialized_start=55
+ _globals['_STORESVALUE']._serialized_end=83
+ _globals['_STORESSETOPTIONS']._serialized_start=85
+ _globals['_STORESSETOPTIONS']._serialized_end=175
+ _globals['_STORESDELETEOPTIONS']._serialized_start=177
+ _globals['_STORESDELETEOPTIONS']._serialized_end=232
+ _globals['_STORESGETOPTIONS']._serialized_start=234
+ _globals['_STORESGETOPTIONS']._serialized_end=286
+ _globals['_STORESGETRESULT']._serialized_start=288
+ _globals['_STORESGETRESULT']._serialized_end=377
+ _globals['_STORESFINDOPTIONS']._serialized_start=379
+ _globals['_STORESFINDOPTIONS']._serialized_end=445
+ _globals['_STORESFINDRESULT']._serialized_start=447
+ _globals['_STORESFINDRESULT']._serialized_end=559
+ _globals['_HEALTHMESSAGE']._serialized_start=561
+ _globals['_HEALTHMESSAGE']._serialized_end=576
+ _globals['_PREDICTOPTIONS']._serialized_start=579
+ _globals['_PREDICTOPTIONS']._serialized_end=1451
+ _globals['_REPLY']._serialized_start=1453
+ _globals['_REPLY']._serialized_end=1477
+ _globals['_MODELOPTIONS']._serialized_start=1480
+ _globals['_MODELOPTIONS']._serialized_end=2552
+ _globals['_RESULT']._serialized_start=2554
+ _globals['_RESULT']._serialized_end=2596
+ _globals['_EMBEDDINGRESULT']._serialized_start=2598
+ _globals['_EMBEDDINGRESULT']._serialized_end=2635
+ _globals['_TRANSCRIPTREQUEST']._serialized_start=2637
+ _globals['_TRANSCRIPTREQUEST']._serialized_end=2704
+ _globals['_TRANSCRIPTRESULT']._serialized_start=2706
+ _globals['_TRANSCRIPTRESULT']._serialized_end=2784
+ _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786
+ _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875
+ _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878
+ _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093
+ _globals['_TTSREQUEST']._serialized_start=3095
+ _globals['_TTSREQUEST']._serialized_end=3164
+ _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166
+ _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220
+ _globals['_MEMORYUSAGEDATA']._serialized_start=3223
+ _globals['_MEMORYUSAGEDATA']._serialized_end=3365
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365
+ _globals['_STATUSRESPONSE']._serialized_start=3368
+ _globals['_STATUSRESPONSE']._serialized_end=3541
+ _globals['_STATUSRESPONSE_STATE']._serialized_start=3474
+ _globals['_STATUSRESPONSE_STATE']._serialized_end=3541
+ _globals['_MESSAGE']._serialized_start=3543
+ _globals['_MESSAGE']._serialized_end=3583
+ _globals['_BACKEND']._serialized_start=3586
+ _globals['_BACKEND']._serialized_end=4477
# @@protoc_insertion_point(module_scope)
diff --git a/backend/python/diffusers/backend_pb2_grpc.py b/backend/python/diffusers/backend_pb2_grpc.py
index 79a7677f..e06fccf3 100644
--- a/backend/python/diffusers/backend_pb2_grpc.py
+++ b/backend/python/diffusers/backend_pb2_grpc.py
@@ -64,6 +64,26 @@ class BackendStub(object):
request_serializer=backend__pb2.HealthMessage.SerializeToString,
response_deserializer=backend__pb2.StatusResponse.FromString,
)
+ self.StoresSet = channel.unary_unary(
+ '/backend.Backend/StoresSet',
+ request_serializer=backend__pb2.StoresSetOptions.SerializeToString,
+ response_deserializer=backend__pb2.Result.FromString,
+ )
+ self.StoresDelete = channel.unary_unary(
+ '/backend.Backend/StoresDelete',
+ request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString,
+ response_deserializer=backend__pb2.Result.FromString,
+ )
+ self.StoresGet = channel.unary_unary(
+ '/backend.Backend/StoresGet',
+ request_serializer=backend__pb2.StoresGetOptions.SerializeToString,
+ response_deserializer=backend__pb2.StoresGetResult.FromString,
+ )
+ self.StoresFind = channel.unary_unary(
+ '/backend.Backend/StoresFind',
+ request_serializer=backend__pb2.StoresFindOptions.SerializeToString,
+ response_deserializer=backend__pb2.StoresFindResult.FromString,
+ )
class BackendServicer(object):
@@ -129,6 +149,30 @@ class BackendServicer(object):
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
+ def StoresSet(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresDelete(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresGet(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresFind(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
def add_BackendServicer_to_server(servicer, server):
rpc_method_handlers = {
@@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server):
request_deserializer=backend__pb2.HealthMessage.FromString,
response_serializer=backend__pb2.StatusResponse.SerializeToString,
),
+ 'StoresSet': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresSet,
+ request_deserializer=backend__pb2.StoresSetOptions.FromString,
+ response_serializer=backend__pb2.Result.SerializeToString,
+ ),
+ 'StoresDelete': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresDelete,
+ request_deserializer=backend__pb2.StoresDeleteOptions.FromString,
+ response_serializer=backend__pb2.Result.SerializeToString,
+ ),
+ 'StoresGet': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresGet,
+ request_deserializer=backend__pb2.StoresGetOptions.FromString,
+ response_serializer=backend__pb2.StoresGetResult.SerializeToString,
+ ),
+ 'StoresFind': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresFind,
+ request_deserializer=backend__pb2.StoresFindOptions.FromString,
+ response_serializer=backend__pb2.StoresFindResult.SerializeToString,
+ ),
}
generic_handler = grpc.method_handlers_generic_handler(
'backend.Backend', rpc_method_handlers)
@@ -361,3 +425,71 @@ class Backend(object):
backend__pb2.StatusResponse.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresSet(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet',
+ backend__pb2.StoresSetOptions.SerializeToString,
+ backend__pb2.Result.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresDelete(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete',
+ backend__pb2.StoresDeleteOptions.SerializeToString,
+ backend__pb2.Result.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresGet(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet',
+ backend__pb2.StoresGetOptions.SerializeToString,
+ backend__pb2.StoresGetResult.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresFind(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind',
+ backend__pb2.StoresFindOptions.SerializeToString,
+ backend__pb2.StoresFindResult.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
diff --git a/backend/python/exllama/backend_pb2.py b/backend/python/exllama/backend_pb2.py
index 08b896c7..24b6de3b 100644
--- a/backend/python/exllama/backend_pb2.py
+++ b/backend/python/exllama/backend_pb2.py
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: backend.proto
+# Protobuf Python Version: 4.25.1
"""Generated protocol buffer code."""
from google.protobuf import descriptor as _descriptor
from google.protobuf import descriptor_pool as _descriptor_pool
@@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default()
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
_globals = globals()
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals)
if _descriptor._USE_C_DESCRIPTORS == False:
-
- DESCRIPTOR._options = None
- DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
- _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None
- _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001'
- _globals['_HEALTHMESSAGE']._serialized_start=26
- _globals['_HEALTHMESSAGE']._serialized_end=41
- _globals['_PREDICTOPTIONS']._serialized_start=44
- _globals['_PREDICTOPTIONS']._serialized_end=850
- _globals['_REPLY']._serialized_start=852
- _globals['_REPLY']._serialized_end=876
- _globals['_MODELOPTIONS']._serialized_start=879
- _globals['_MODELOPTIONS']._serialized_end=1951
- _globals['_RESULT']._serialized_start=1953
- _globals['_RESULT']._serialized_end=1995
- _globals['_EMBEDDINGRESULT']._serialized_start=1997
- _globals['_EMBEDDINGRESULT']._serialized_end=2034
- _globals['_TRANSCRIPTREQUEST']._serialized_start=2036
- _globals['_TRANSCRIPTREQUEST']._serialized_end=2103
- _globals['_TRANSCRIPTRESULT']._serialized_start=2105
- _globals['_TRANSCRIPTRESULT']._serialized_end=2183
- _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185
- _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274
- _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277
- _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492
- _globals['_TTSREQUEST']._serialized_start=2494
- _globals['_TTSREQUEST']._serialized_end=2563
- _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565
- _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619
- _globals['_MEMORYUSAGEDATA']._serialized_start=2622
- _globals['_MEMORYUSAGEDATA']._serialized_end=2764
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764
- _globals['_STATUSRESPONSE']._serialized_start=2767
- _globals['_STATUSRESPONSE']._serialized_end=2940
- _globals['_STATUSRESPONSE_STATE']._serialized_start=2873
- _globals['_STATUSRESPONSE_STATE']._serialized_end=2940
- _globals['_BACKEND']._serialized_start=2943
- _globals['_BACKEND']._serialized_end=3571
+ _globals['DESCRIPTOR']._options = None
+ _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001'
+ _globals['_STORESKEY']._serialized_start=26
+ _globals['_STORESKEY']._serialized_end=53
+ _globals['_STORESVALUE']._serialized_start=55
+ _globals['_STORESVALUE']._serialized_end=83
+ _globals['_STORESSETOPTIONS']._serialized_start=85
+ _globals['_STORESSETOPTIONS']._serialized_end=175
+ _globals['_STORESDELETEOPTIONS']._serialized_start=177
+ _globals['_STORESDELETEOPTIONS']._serialized_end=232
+ _globals['_STORESGETOPTIONS']._serialized_start=234
+ _globals['_STORESGETOPTIONS']._serialized_end=286
+ _globals['_STORESGETRESULT']._serialized_start=288
+ _globals['_STORESGETRESULT']._serialized_end=377
+ _globals['_STORESFINDOPTIONS']._serialized_start=379
+ _globals['_STORESFINDOPTIONS']._serialized_end=445
+ _globals['_STORESFINDRESULT']._serialized_start=447
+ _globals['_STORESFINDRESULT']._serialized_end=559
+ _globals['_HEALTHMESSAGE']._serialized_start=561
+ _globals['_HEALTHMESSAGE']._serialized_end=576
+ _globals['_PREDICTOPTIONS']._serialized_start=579
+ _globals['_PREDICTOPTIONS']._serialized_end=1451
+ _globals['_REPLY']._serialized_start=1453
+ _globals['_REPLY']._serialized_end=1477
+ _globals['_MODELOPTIONS']._serialized_start=1480
+ _globals['_MODELOPTIONS']._serialized_end=2552
+ _globals['_RESULT']._serialized_start=2554
+ _globals['_RESULT']._serialized_end=2596
+ _globals['_EMBEDDINGRESULT']._serialized_start=2598
+ _globals['_EMBEDDINGRESULT']._serialized_end=2635
+ _globals['_TRANSCRIPTREQUEST']._serialized_start=2637
+ _globals['_TRANSCRIPTREQUEST']._serialized_end=2704
+ _globals['_TRANSCRIPTRESULT']._serialized_start=2706
+ _globals['_TRANSCRIPTRESULT']._serialized_end=2784
+ _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786
+ _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875
+ _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878
+ _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093
+ _globals['_TTSREQUEST']._serialized_start=3095
+ _globals['_TTSREQUEST']._serialized_end=3164
+ _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166
+ _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220
+ _globals['_MEMORYUSAGEDATA']._serialized_start=3223
+ _globals['_MEMORYUSAGEDATA']._serialized_end=3365
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365
+ _globals['_STATUSRESPONSE']._serialized_start=3368
+ _globals['_STATUSRESPONSE']._serialized_end=3541
+ _globals['_STATUSRESPONSE_STATE']._serialized_start=3474
+ _globals['_STATUSRESPONSE_STATE']._serialized_end=3541
+ _globals['_MESSAGE']._serialized_start=3543
+ _globals['_MESSAGE']._serialized_end=3583
+ _globals['_BACKEND']._serialized_start=3586
+ _globals['_BACKEND']._serialized_end=4477
# @@protoc_insertion_point(module_scope)
diff --git a/backend/python/exllama/backend_pb2_grpc.py b/backend/python/exllama/backend_pb2_grpc.py
index 79a7677f..e06fccf3 100644
--- a/backend/python/exllama/backend_pb2_grpc.py
+++ b/backend/python/exllama/backend_pb2_grpc.py
@@ -64,6 +64,26 @@ class BackendStub(object):
request_serializer=backend__pb2.HealthMessage.SerializeToString,
response_deserializer=backend__pb2.StatusResponse.FromString,
)
+ self.StoresSet = channel.unary_unary(
+ '/backend.Backend/StoresSet',
+ request_serializer=backend__pb2.StoresSetOptions.SerializeToString,
+ response_deserializer=backend__pb2.Result.FromString,
+ )
+ self.StoresDelete = channel.unary_unary(
+ '/backend.Backend/StoresDelete',
+ request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString,
+ response_deserializer=backend__pb2.Result.FromString,
+ )
+ self.StoresGet = channel.unary_unary(
+ '/backend.Backend/StoresGet',
+ request_serializer=backend__pb2.StoresGetOptions.SerializeToString,
+ response_deserializer=backend__pb2.StoresGetResult.FromString,
+ )
+ self.StoresFind = channel.unary_unary(
+ '/backend.Backend/StoresFind',
+ request_serializer=backend__pb2.StoresFindOptions.SerializeToString,
+ response_deserializer=backend__pb2.StoresFindResult.FromString,
+ )
class BackendServicer(object):
@@ -129,6 +149,30 @@ class BackendServicer(object):
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
+ def StoresSet(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresDelete(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresGet(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresFind(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
def add_BackendServicer_to_server(servicer, server):
rpc_method_handlers = {
@@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server):
request_deserializer=backend__pb2.HealthMessage.FromString,
response_serializer=backend__pb2.StatusResponse.SerializeToString,
),
+ 'StoresSet': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresSet,
+ request_deserializer=backend__pb2.StoresSetOptions.FromString,
+ response_serializer=backend__pb2.Result.SerializeToString,
+ ),
+ 'StoresDelete': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresDelete,
+ request_deserializer=backend__pb2.StoresDeleteOptions.FromString,
+ response_serializer=backend__pb2.Result.SerializeToString,
+ ),
+ 'StoresGet': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresGet,
+ request_deserializer=backend__pb2.StoresGetOptions.FromString,
+ response_serializer=backend__pb2.StoresGetResult.SerializeToString,
+ ),
+ 'StoresFind': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresFind,
+ request_deserializer=backend__pb2.StoresFindOptions.FromString,
+ response_serializer=backend__pb2.StoresFindResult.SerializeToString,
+ ),
}
generic_handler = grpc.method_handlers_generic_handler(
'backend.Backend', rpc_method_handlers)
@@ -361,3 +425,71 @@ class Backend(object):
backend__pb2.StatusResponse.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresSet(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet',
+ backend__pb2.StoresSetOptions.SerializeToString,
+ backend__pb2.Result.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresDelete(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete',
+ backend__pb2.StoresDeleteOptions.SerializeToString,
+ backend__pb2.Result.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresGet(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet',
+ backend__pb2.StoresGetOptions.SerializeToString,
+ backend__pb2.StoresGetResult.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresFind(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind',
+ backend__pb2.StoresFindOptions.SerializeToString,
+ backend__pb2.StoresFindResult.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
diff --git a/backend/python/exllama2/backend_pb2.py b/backend/python/exllama2/backend_pb2.py
index 08b896c7..24b6de3b 100644
--- a/backend/python/exllama2/backend_pb2.py
+++ b/backend/python/exllama2/backend_pb2.py
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: backend.proto
+# Protobuf Python Version: 4.25.1
"""Generated protocol buffer code."""
from google.protobuf import descriptor as _descriptor
from google.protobuf import descriptor_pool as _descriptor_pool
@@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default()
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
_globals = globals()
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals)
if _descriptor._USE_C_DESCRIPTORS == False:
-
- DESCRIPTOR._options = None
- DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
- _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None
- _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001'
- _globals['_HEALTHMESSAGE']._serialized_start=26
- _globals['_HEALTHMESSAGE']._serialized_end=41
- _globals['_PREDICTOPTIONS']._serialized_start=44
- _globals['_PREDICTOPTIONS']._serialized_end=850
- _globals['_REPLY']._serialized_start=852
- _globals['_REPLY']._serialized_end=876
- _globals['_MODELOPTIONS']._serialized_start=879
- _globals['_MODELOPTIONS']._serialized_end=1951
- _globals['_RESULT']._serialized_start=1953
- _globals['_RESULT']._serialized_end=1995
- _globals['_EMBEDDINGRESULT']._serialized_start=1997
- _globals['_EMBEDDINGRESULT']._serialized_end=2034
- _globals['_TRANSCRIPTREQUEST']._serialized_start=2036
- _globals['_TRANSCRIPTREQUEST']._serialized_end=2103
- _globals['_TRANSCRIPTRESULT']._serialized_start=2105
- _globals['_TRANSCRIPTRESULT']._serialized_end=2183
- _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185
- _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274
- _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277
- _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492
- _globals['_TTSREQUEST']._serialized_start=2494
- _globals['_TTSREQUEST']._serialized_end=2563
- _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565
- _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619
- _globals['_MEMORYUSAGEDATA']._serialized_start=2622
- _globals['_MEMORYUSAGEDATA']._serialized_end=2764
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764
- _globals['_STATUSRESPONSE']._serialized_start=2767
- _globals['_STATUSRESPONSE']._serialized_end=2940
- _globals['_STATUSRESPONSE_STATE']._serialized_start=2873
- _globals['_STATUSRESPONSE_STATE']._serialized_end=2940
- _globals['_BACKEND']._serialized_start=2943
- _globals['_BACKEND']._serialized_end=3571
+ _globals['DESCRIPTOR']._options = None
+ _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001'
+ _globals['_STORESKEY']._serialized_start=26
+ _globals['_STORESKEY']._serialized_end=53
+ _globals['_STORESVALUE']._serialized_start=55
+ _globals['_STORESVALUE']._serialized_end=83
+ _globals['_STORESSETOPTIONS']._serialized_start=85
+ _globals['_STORESSETOPTIONS']._serialized_end=175
+ _globals['_STORESDELETEOPTIONS']._serialized_start=177
+ _globals['_STORESDELETEOPTIONS']._serialized_end=232
+ _globals['_STORESGETOPTIONS']._serialized_start=234
+ _globals['_STORESGETOPTIONS']._serialized_end=286
+ _globals['_STORESGETRESULT']._serialized_start=288
+ _globals['_STORESGETRESULT']._serialized_end=377
+ _globals['_STORESFINDOPTIONS']._serialized_start=379
+ _globals['_STORESFINDOPTIONS']._serialized_end=445
+ _globals['_STORESFINDRESULT']._serialized_start=447
+ _globals['_STORESFINDRESULT']._serialized_end=559
+ _globals['_HEALTHMESSAGE']._serialized_start=561
+ _globals['_HEALTHMESSAGE']._serialized_end=576
+ _globals['_PREDICTOPTIONS']._serialized_start=579
+ _globals['_PREDICTOPTIONS']._serialized_end=1451
+ _globals['_REPLY']._serialized_start=1453
+ _globals['_REPLY']._serialized_end=1477
+ _globals['_MODELOPTIONS']._serialized_start=1480
+ _globals['_MODELOPTIONS']._serialized_end=2552
+ _globals['_RESULT']._serialized_start=2554
+ _globals['_RESULT']._serialized_end=2596
+ _globals['_EMBEDDINGRESULT']._serialized_start=2598
+ _globals['_EMBEDDINGRESULT']._serialized_end=2635
+ _globals['_TRANSCRIPTREQUEST']._serialized_start=2637
+ _globals['_TRANSCRIPTREQUEST']._serialized_end=2704
+ _globals['_TRANSCRIPTRESULT']._serialized_start=2706
+ _globals['_TRANSCRIPTRESULT']._serialized_end=2784
+ _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786
+ _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875
+ _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878
+ _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093
+ _globals['_TTSREQUEST']._serialized_start=3095
+ _globals['_TTSREQUEST']._serialized_end=3164
+ _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166
+ _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220
+ _globals['_MEMORYUSAGEDATA']._serialized_start=3223
+ _globals['_MEMORYUSAGEDATA']._serialized_end=3365
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365
+ _globals['_STATUSRESPONSE']._serialized_start=3368
+ _globals['_STATUSRESPONSE']._serialized_end=3541
+ _globals['_STATUSRESPONSE_STATE']._serialized_start=3474
+ _globals['_STATUSRESPONSE_STATE']._serialized_end=3541
+ _globals['_MESSAGE']._serialized_start=3543
+ _globals['_MESSAGE']._serialized_end=3583
+ _globals['_BACKEND']._serialized_start=3586
+ _globals['_BACKEND']._serialized_end=4477
# @@protoc_insertion_point(module_scope)
diff --git a/backend/python/exllama2/backend_pb2_grpc.py b/backend/python/exllama2/backend_pb2_grpc.py
index 79a7677f..e06fccf3 100644
--- a/backend/python/exllama2/backend_pb2_grpc.py
+++ b/backend/python/exllama2/backend_pb2_grpc.py
@@ -64,6 +64,26 @@ class BackendStub(object):
request_serializer=backend__pb2.HealthMessage.SerializeToString,
response_deserializer=backend__pb2.StatusResponse.FromString,
)
+ self.StoresSet = channel.unary_unary(
+ '/backend.Backend/StoresSet',
+ request_serializer=backend__pb2.StoresSetOptions.SerializeToString,
+ response_deserializer=backend__pb2.Result.FromString,
+ )
+ self.StoresDelete = channel.unary_unary(
+ '/backend.Backend/StoresDelete',
+ request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString,
+ response_deserializer=backend__pb2.Result.FromString,
+ )
+ self.StoresGet = channel.unary_unary(
+ '/backend.Backend/StoresGet',
+ request_serializer=backend__pb2.StoresGetOptions.SerializeToString,
+ response_deserializer=backend__pb2.StoresGetResult.FromString,
+ )
+ self.StoresFind = channel.unary_unary(
+ '/backend.Backend/StoresFind',
+ request_serializer=backend__pb2.StoresFindOptions.SerializeToString,
+ response_deserializer=backend__pb2.StoresFindResult.FromString,
+ )
class BackendServicer(object):
@@ -129,6 +149,30 @@ class BackendServicer(object):
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
+ def StoresSet(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresDelete(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresGet(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresFind(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
def add_BackendServicer_to_server(servicer, server):
rpc_method_handlers = {
@@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server):
request_deserializer=backend__pb2.HealthMessage.FromString,
response_serializer=backend__pb2.StatusResponse.SerializeToString,
),
+ 'StoresSet': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresSet,
+ request_deserializer=backend__pb2.StoresSetOptions.FromString,
+ response_serializer=backend__pb2.Result.SerializeToString,
+ ),
+ 'StoresDelete': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresDelete,
+ request_deserializer=backend__pb2.StoresDeleteOptions.FromString,
+ response_serializer=backend__pb2.Result.SerializeToString,
+ ),
+ 'StoresGet': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresGet,
+ request_deserializer=backend__pb2.StoresGetOptions.FromString,
+ response_serializer=backend__pb2.StoresGetResult.SerializeToString,
+ ),
+ 'StoresFind': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresFind,
+ request_deserializer=backend__pb2.StoresFindOptions.FromString,
+ response_serializer=backend__pb2.StoresFindResult.SerializeToString,
+ ),
}
generic_handler = grpc.method_handlers_generic_handler(
'backend.Backend', rpc_method_handlers)
@@ -361,3 +425,71 @@ class Backend(object):
backend__pb2.StatusResponse.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresSet(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet',
+ backend__pb2.StoresSetOptions.SerializeToString,
+ backend__pb2.Result.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresDelete(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete',
+ backend__pb2.StoresDeleteOptions.SerializeToString,
+ backend__pb2.Result.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresGet(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet',
+ backend__pb2.StoresGetOptions.SerializeToString,
+ backend__pb2.StoresGetResult.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresFind(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind',
+ backend__pb2.StoresFindOptions.SerializeToString,
+ backend__pb2.StoresFindResult.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
diff --git a/backend/python/mamba/backend_pb2.py b/backend/python/mamba/backend_pb2.py
index 08b896c7..24b6de3b 100644
--- a/backend/python/mamba/backend_pb2.py
+++ b/backend/python/mamba/backend_pb2.py
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: backend.proto
+# Protobuf Python Version: 4.25.1
"""Generated protocol buffer code."""
from google.protobuf import descriptor as _descriptor
from google.protobuf import descriptor_pool as _descriptor_pool
@@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default()
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
_globals = globals()
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals)
if _descriptor._USE_C_DESCRIPTORS == False:
-
- DESCRIPTOR._options = None
- DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
- _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None
- _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001'
- _globals['_HEALTHMESSAGE']._serialized_start=26
- _globals['_HEALTHMESSAGE']._serialized_end=41
- _globals['_PREDICTOPTIONS']._serialized_start=44
- _globals['_PREDICTOPTIONS']._serialized_end=850
- _globals['_REPLY']._serialized_start=852
- _globals['_REPLY']._serialized_end=876
- _globals['_MODELOPTIONS']._serialized_start=879
- _globals['_MODELOPTIONS']._serialized_end=1951
- _globals['_RESULT']._serialized_start=1953
- _globals['_RESULT']._serialized_end=1995
- _globals['_EMBEDDINGRESULT']._serialized_start=1997
- _globals['_EMBEDDINGRESULT']._serialized_end=2034
- _globals['_TRANSCRIPTREQUEST']._serialized_start=2036
- _globals['_TRANSCRIPTREQUEST']._serialized_end=2103
- _globals['_TRANSCRIPTRESULT']._serialized_start=2105
- _globals['_TRANSCRIPTRESULT']._serialized_end=2183
- _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185
- _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274
- _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277
- _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492
- _globals['_TTSREQUEST']._serialized_start=2494
- _globals['_TTSREQUEST']._serialized_end=2563
- _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565
- _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619
- _globals['_MEMORYUSAGEDATA']._serialized_start=2622
- _globals['_MEMORYUSAGEDATA']._serialized_end=2764
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764
- _globals['_STATUSRESPONSE']._serialized_start=2767
- _globals['_STATUSRESPONSE']._serialized_end=2940
- _globals['_STATUSRESPONSE_STATE']._serialized_start=2873
- _globals['_STATUSRESPONSE_STATE']._serialized_end=2940
- _globals['_BACKEND']._serialized_start=2943
- _globals['_BACKEND']._serialized_end=3571
+ _globals['DESCRIPTOR']._options = None
+ _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001'
+ _globals['_STORESKEY']._serialized_start=26
+ _globals['_STORESKEY']._serialized_end=53
+ _globals['_STORESVALUE']._serialized_start=55
+ _globals['_STORESVALUE']._serialized_end=83
+ _globals['_STORESSETOPTIONS']._serialized_start=85
+ _globals['_STORESSETOPTIONS']._serialized_end=175
+ _globals['_STORESDELETEOPTIONS']._serialized_start=177
+ _globals['_STORESDELETEOPTIONS']._serialized_end=232
+ _globals['_STORESGETOPTIONS']._serialized_start=234
+ _globals['_STORESGETOPTIONS']._serialized_end=286
+ _globals['_STORESGETRESULT']._serialized_start=288
+ _globals['_STORESGETRESULT']._serialized_end=377
+ _globals['_STORESFINDOPTIONS']._serialized_start=379
+ _globals['_STORESFINDOPTIONS']._serialized_end=445
+ _globals['_STORESFINDRESULT']._serialized_start=447
+ _globals['_STORESFINDRESULT']._serialized_end=559
+ _globals['_HEALTHMESSAGE']._serialized_start=561
+ _globals['_HEALTHMESSAGE']._serialized_end=576
+ _globals['_PREDICTOPTIONS']._serialized_start=579
+ _globals['_PREDICTOPTIONS']._serialized_end=1451
+ _globals['_REPLY']._serialized_start=1453
+ _globals['_REPLY']._serialized_end=1477
+ _globals['_MODELOPTIONS']._serialized_start=1480
+ _globals['_MODELOPTIONS']._serialized_end=2552
+ _globals['_RESULT']._serialized_start=2554
+ _globals['_RESULT']._serialized_end=2596
+ _globals['_EMBEDDINGRESULT']._serialized_start=2598
+ _globals['_EMBEDDINGRESULT']._serialized_end=2635
+ _globals['_TRANSCRIPTREQUEST']._serialized_start=2637
+ _globals['_TRANSCRIPTREQUEST']._serialized_end=2704
+ _globals['_TRANSCRIPTRESULT']._serialized_start=2706
+ _globals['_TRANSCRIPTRESULT']._serialized_end=2784
+ _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786
+ _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875
+ _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878
+ _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093
+ _globals['_TTSREQUEST']._serialized_start=3095
+ _globals['_TTSREQUEST']._serialized_end=3164
+ _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166
+ _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220
+ _globals['_MEMORYUSAGEDATA']._serialized_start=3223
+ _globals['_MEMORYUSAGEDATA']._serialized_end=3365
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365
+ _globals['_STATUSRESPONSE']._serialized_start=3368
+ _globals['_STATUSRESPONSE']._serialized_end=3541
+ _globals['_STATUSRESPONSE_STATE']._serialized_start=3474
+ _globals['_STATUSRESPONSE_STATE']._serialized_end=3541
+ _globals['_MESSAGE']._serialized_start=3543
+ _globals['_MESSAGE']._serialized_end=3583
+ _globals['_BACKEND']._serialized_start=3586
+ _globals['_BACKEND']._serialized_end=4477
# @@protoc_insertion_point(module_scope)
diff --git a/backend/python/mamba/backend_pb2_grpc.py b/backend/python/mamba/backend_pb2_grpc.py
index 79a7677f..e06fccf3 100644
--- a/backend/python/mamba/backend_pb2_grpc.py
+++ b/backend/python/mamba/backend_pb2_grpc.py
@@ -64,6 +64,26 @@ class BackendStub(object):
request_serializer=backend__pb2.HealthMessage.SerializeToString,
response_deserializer=backend__pb2.StatusResponse.FromString,
)
+ self.StoresSet = channel.unary_unary(
+ '/backend.Backend/StoresSet',
+ request_serializer=backend__pb2.StoresSetOptions.SerializeToString,
+ response_deserializer=backend__pb2.Result.FromString,
+ )
+ self.StoresDelete = channel.unary_unary(
+ '/backend.Backend/StoresDelete',
+ request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString,
+ response_deserializer=backend__pb2.Result.FromString,
+ )
+ self.StoresGet = channel.unary_unary(
+ '/backend.Backend/StoresGet',
+ request_serializer=backend__pb2.StoresGetOptions.SerializeToString,
+ response_deserializer=backend__pb2.StoresGetResult.FromString,
+ )
+ self.StoresFind = channel.unary_unary(
+ '/backend.Backend/StoresFind',
+ request_serializer=backend__pb2.StoresFindOptions.SerializeToString,
+ response_deserializer=backend__pb2.StoresFindResult.FromString,
+ )
class BackendServicer(object):
@@ -129,6 +149,30 @@ class BackendServicer(object):
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
+ def StoresSet(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresDelete(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresGet(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresFind(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
def add_BackendServicer_to_server(servicer, server):
rpc_method_handlers = {
@@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server):
request_deserializer=backend__pb2.HealthMessage.FromString,
response_serializer=backend__pb2.StatusResponse.SerializeToString,
),
+ 'StoresSet': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresSet,
+ request_deserializer=backend__pb2.StoresSetOptions.FromString,
+ response_serializer=backend__pb2.Result.SerializeToString,
+ ),
+ 'StoresDelete': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresDelete,
+ request_deserializer=backend__pb2.StoresDeleteOptions.FromString,
+ response_serializer=backend__pb2.Result.SerializeToString,
+ ),
+ 'StoresGet': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresGet,
+ request_deserializer=backend__pb2.StoresGetOptions.FromString,
+ response_serializer=backend__pb2.StoresGetResult.SerializeToString,
+ ),
+ 'StoresFind': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresFind,
+ request_deserializer=backend__pb2.StoresFindOptions.FromString,
+ response_serializer=backend__pb2.StoresFindResult.SerializeToString,
+ ),
}
generic_handler = grpc.method_handlers_generic_handler(
'backend.Backend', rpc_method_handlers)
@@ -361,3 +425,71 @@ class Backend(object):
backend__pb2.StatusResponse.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresSet(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet',
+ backend__pb2.StoresSetOptions.SerializeToString,
+ backend__pb2.Result.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresDelete(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete',
+ backend__pb2.StoresDeleteOptions.SerializeToString,
+ backend__pb2.Result.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresGet(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet',
+ backend__pb2.StoresGetOptions.SerializeToString,
+ backend__pb2.StoresGetResult.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresFind(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind',
+ backend__pb2.StoresFindOptions.SerializeToString,
+ backend__pb2.StoresFindResult.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
diff --git a/backend/python/petals/backend_pb2.py b/backend/python/petals/backend_pb2.py
index 08b896c7..24b6de3b 100644
--- a/backend/python/petals/backend_pb2.py
+++ b/backend/python/petals/backend_pb2.py
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: backend.proto
+# Protobuf Python Version: 4.25.1
"""Generated protocol buffer code."""
from google.protobuf import descriptor as _descriptor
from google.protobuf import descriptor_pool as _descriptor_pool
@@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default()
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
_globals = globals()
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals)
if _descriptor._USE_C_DESCRIPTORS == False:
-
- DESCRIPTOR._options = None
- DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
- _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None
- _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001'
- _globals['_HEALTHMESSAGE']._serialized_start=26
- _globals['_HEALTHMESSAGE']._serialized_end=41
- _globals['_PREDICTOPTIONS']._serialized_start=44
- _globals['_PREDICTOPTIONS']._serialized_end=850
- _globals['_REPLY']._serialized_start=852
- _globals['_REPLY']._serialized_end=876
- _globals['_MODELOPTIONS']._serialized_start=879
- _globals['_MODELOPTIONS']._serialized_end=1951
- _globals['_RESULT']._serialized_start=1953
- _globals['_RESULT']._serialized_end=1995
- _globals['_EMBEDDINGRESULT']._serialized_start=1997
- _globals['_EMBEDDINGRESULT']._serialized_end=2034
- _globals['_TRANSCRIPTREQUEST']._serialized_start=2036
- _globals['_TRANSCRIPTREQUEST']._serialized_end=2103
- _globals['_TRANSCRIPTRESULT']._serialized_start=2105
- _globals['_TRANSCRIPTRESULT']._serialized_end=2183
- _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185
- _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274
- _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277
- _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492
- _globals['_TTSREQUEST']._serialized_start=2494
- _globals['_TTSREQUEST']._serialized_end=2563
- _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565
- _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619
- _globals['_MEMORYUSAGEDATA']._serialized_start=2622
- _globals['_MEMORYUSAGEDATA']._serialized_end=2764
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764
- _globals['_STATUSRESPONSE']._serialized_start=2767
- _globals['_STATUSRESPONSE']._serialized_end=2940
- _globals['_STATUSRESPONSE_STATE']._serialized_start=2873
- _globals['_STATUSRESPONSE_STATE']._serialized_end=2940
- _globals['_BACKEND']._serialized_start=2943
- _globals['_BACKEND']._serialized_end=3571
+ _globals['DESCRIPTOR']._options = None
+ _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001'
+ _globals['_STORESKEY']._serialized_start=26
+ _globals['_STORESKEY']._serialized_end=53
+ _globals['_STORESVALUE']._serialized_start=55
+ _globals['_STORESVALUE']._serialized_end=83
+ _globals['_STORESSETOPTIONS']._serialized_start=85
+ _globals['_STORESSETOPTIONS']._serialized_end=175
+ _globals['_STORESDELETEOPTIONS']._serialized_start=177
+ _globals['_STORESDELETEOPTIONS']._serialized_end=232
+ _globals['_STORESGETOPTIONS']._serialized_start=234
+ _globals['_STORESGETOPTIONS']._serialized_end=286
+ _globals['_STORESGETRESULT']._serialized_start=288
+ _globals['_STORESGETRESULT']._serialized_end=377
+ _globals['_STORESFINDOPTIONS']._serialized_start=379
+ _globals['_STORESFINDOPTIONS']._serialized_end=445
+ _globals['_STORESFINDRESULT']._serialized_start=447
+ _globals['_STORESFINDRESULT']._serialized_end=559
+ _globals['_HEALTHMESSAGE']._serialized_start=561
+ _globals['_HEALTHMESSAGE']._serialized_end=576
+ _globals['_PREDICTOPTIONS']._serialized_start=579
+ _globals['_PREDICTOPTIONS']._serialized_end=1451
+ _globals['_REPLY']._serialized_start=1453
+ _globals['_REPLY']._serialized_end=1477
+ _globals['_MODELOPTIONS']._serialized_start=1480
+ _globals['_MODELOPTIONS']._serialized_end=2552
+ _globals['_RESULT']._serialized_start=2554
+ _globals['_RESULT']._serialized_end=2596
+ _globals['_EMBEDDINGRESULT']._serialized_start=2598
+ _globals['_EMBEDDINGRESULT']._serialized_end=2635
+ _globals['_TRANSCRIPTREQUEST']._serialized_start=2637
+ _globals['_TRANSCRIPTREQUEST']._serialized_end=2704
+ _globals['_TRANSCRIPTRESULT']._serialized_start=2706
+ _globals['_TRANSCRIPTRESULT']._serialized_end=2784
+ _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786
+ _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875
+ _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878
+ _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093
+ _globals['_TTSREQUEST']._serialized_start=3095
+ _globals['_TTSREQUEST']._serialized_end=3164
+ _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166
+ _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220
+ _globals['_MEMORYUSAGEDATA']._serialized_start=3223
+ _globals['_MEMORYUSAGEDATA']._serialized_end=3365
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365
+ _globals['_STATUSRESPONSE']._serialized_start=3368
+ _globals['_STATUSRESPONSE']._serialized_end=3541
+ _globals['_STATUSRESPONSE_STATE']._serialized_start=3474
+ _globals['_STATUSRESPONSE_STATE']._serialized_end=3541
+ _globals['_MESSAGE']._serialized_start=3543
+ _globals['_MESSAGE']._serialized_end=3583
+ _globals['_BACKEND']._serialized_start=3586
+ _globals['_BACKEND']._serialized_end=4477
# @@protoc_insertion_point(module_scope)
diff --git a/backend/python/petals/backend_pb2_grpc.py b/backend/python/petals/backend_pb2_grpc.py
index 79a7677f..e06fccf3 100644
--- a/backend/python/petals/backend_pb2_grpc.py
+++ b/backend/python/petals/backend_pb2_grpc.py
@@ -64,6 +64,26 @@ class BackendStub(object):
request_serializer=backend__pb2.HealthMessage.SerializeToString,
response_deserializer=backend__pb2.StatusResponse.FromString,
)
+ self.StoresSet = channel.unary_unary(
+ '/backend.Backend/StoresSet',
+ request_serializer=backend__pb2.StoresSetOptions.SerializeToString,
+ response_deserializer=backend__pb2.Result.FromString,
+ )
+ self.StoresDelete = channel.unary_unary(
+ '/backend.Backend/StoresDelete',
+ request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString,
+ response_deserializer=backend__pb2.Result.FromString,
+ )
+ self.StoresGet = channel.unary_unary(
+ '/backend.Backend/StoresGet',
+ request_serializer=backend__pb2.StoresGetOptions.SerializeToString,
+ response_deserializer=backend__pb2.StoresGetResult.FromString,
+ )
+ self.StoresFind = channel.unary_unary(
+ '/backend.Backend/StoresFind',
+ request_serializer=backend__pb2.StoresFindOptions.SerializeToString,
+ response_deserializer=backend__pb2.StoresFindResult.FromString,
+ )
class BackendServicer(object):
@@ -129,6 +149,30 @@ class BackendServicer(object):
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
+ def StoresSet(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresDelete(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresGet(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresFind(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
def add_BackendServicer_to_server(servicer, server):
rpc_method_handlers = {
@@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server):
request_deserializer=backend__pb2.HealthMessage.FromString,
response_serializer=backend__pb2.StatusResponse.SerializeToString,
),
+ 'StoresSet': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresSet,
+ request_deserializer=backend__pb2.StoresSetOptions.FromString,
+ response_serializer=backend__pb2.Result.SerializeToString,
+ ),
+ 'StoresDelete': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresDelete,
+ request_deserializer=backend__pb2.StoresDeleteOptions.FromString,
+ response_serializer=backend__pb2.Result.SerializeToString,
+ ),
+ 'StoresGet': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresGet,
+ request_deserializer=backend__pb2.StoresGetOptions.FromString,
+ response_serializer=backend__pb2.StoresGetResult.SerializeToString,
+ ),
+ 'StoresFind': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresFind,
+ request_deserializer=backend__pb2.StoresFindOptions.FromString,
+ response_serializer=backend__pb2.StoresFindResult.SerializeToString,
+ ),
}
generic_handler = grpc.method_handlers_generic_handler(
'backend.Backend', rpc_method_handlers)
@@ -361,3 +425,71 @@ class Backend(object):
backend__pb2.StatusResponse.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresSet(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet',
+ backend__pb2.StoresSetOptions.SerializeToString,
+ backend__pb2.Result.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresDelete(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete',
+ backend__pb2.StoresDeleteOptions.SerializeToString,
+ backend__pb2.Result.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresGet(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet',
+ backend__pb2.StoresGetOptions.SerializeToString,
+ backend__pb2.StoresGetResult.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresFind(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind',
+ backend__pb2.StoresFindOptions.SerializeToString,
+ backend__pb2.StoresFindResult.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
diff --git a/backend/python/sentencetransformers/backend_pb2.py b/backend/python/sentencetransformers/backend_pb2.py
index 08b896c7..24b6de3b 100644
--- a/backend/python/sentencetransformers/backend_pb2.py
+++ b/backend/python/sentencetransformers/backend_pb2.py
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: backend.proto
+# Protobuf Python Version: 4.25.1
"""Generated protocol buffer code."""
from google.protobuf import descriptor as _descriptor
from google.protobuf import descriptor_pool as _descriptor_pool
@@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default()
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
_globals = globals()
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals)
if _descriptor._USE_C_DESCRIPTORS == False:
-
- DESCRIPTOR._options = None
- DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
- _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None
- _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001'
- _globals['_HEALTHMESSAGE']._serialized_start=26
- _globals['_HEALTHMESSAGE']._serialized_end=41
- _globals['_PREDICTOPTIONS']._serialized_start=44
- _globals['_PREDICTOPTIONS']._serialized_end=850
- _globals['_REPLY']._serialized_start=852
- _globals['_REPLY']._serialized_end=876
- _globals['_MODELOPTIONS']._serialized_start=879
- _globals['_MODELOPTIONS']._serialized_end=1951
- _globals['_RESULT']._serialized_start=1953
- _globals['_RESULT']._serialized_end=1995
- _globals['_EMBEDDINGRESULT']._serialized_start=1997
- _globals['_EMBEDDINGRESULT']._serialized_end=2034
- _globals['_TRANSCRIPTREQUEST']._serialized_start=2036
- _globals['_TRANSCRIPTREQUEST']._serialized_end=2103
- _globals['_TRANSCRIPTRESULT']._serialized_start=2105
- _globals['_TRANSCRIPTRESULT']._serialized_end=2183
- _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185
- _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274
- _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277
- _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492
- _globals['_TTSREQUEST']._serialized_start=2494
- _globals['_TTSREQUEST']._serialized_end=2563
- _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565
- _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619
- _globals['_MEMORYUSAGEDATA']._serialized_start=2622
- _globals['_MEMORYUSAGEDATA']._serialized_end=2764
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764
- _globals['_STATUSRESPONSE']._serialized_start=2767
- _globals['_STATUSRESPONSE']._serialized_end=2940
- _globals['_STATUSRESPONSE_STATE']._serialized_start=2873
- _globals['_STATUSRESPONSE_STATE']._serialized_end=2940
- _globals['_BACKEND']._serialized_start=2943
- _globals['_BACKEND']._serialized_end=3571
+ _globals['DESCRIPTOR']._options = None
+ _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001'
+ _globals['_STORESKEY']._serialized_start=26
+ _globals['_STORESKEY']._serialized_end=53
+ _globals['_STORESVALUE']._serialized_start=55
+ _globals['_STORESVALUE']._serialized_end=83
+ _globals['_STORESSETOPTIONS']._serialized_start=85
+ _globals['_STORESSETOPTIONS']._serialized_end=175
+ _globals['_STORESDELETEOPTIONS']._serialized_start=177
+ _globals['_STORESDELETEOPTIONS']._serialized_end=232
+ _globals['_STORESGETOPTIONS']._serialized_start=234
+ _globals['_STORESGETOPTIONS']._serialized_end=286
+ _globals['_STORESGETRESULT']._serialized_start=288
+ _globals['_STORESGETRESULT']._serialized_end=377
+ _globals['_STORESFINDOPTIONS']._serialized_start=379
+ _globals['_STORESFINDOPTIONS']._serialized_end=445
+ _globals['_STORESFINDRESULT']._serialized_start=447
+ _globals['_STORESFINDRESULT']._serialized_end=559
+ _globals['_HEALTHMESSAGE']._serialized_start=561
+ _globals['_HEALTHMESSAGE']._serialized_end=576
+ _globals['_PREDICTOPTIONS']._serialized_start=579
+ _globals['_PREDICTOPTIONS']._serialized_end=1451
+ _globals['_REPLY']._serialized_start=1453
+ _globals['_REPLY']._serialized_end=1477
+ _globals['_MODELOPTIONS']._serialized_start=1480
+ _globals['_MODELOPTIONS']._serialized_end=2552
+ _globals['_RESULT']._serialized_start=2554
+ _globals['_RESULT']._serialized_end=2596
+ _globals['_EMBEDDINGRESULT']._serialized_start=2598
+ _globals['_EMBEDDINGRESULT']._serialized_end=2635
+ _globals['_TRANSCRIPTREQUEST']._serialized_start=2637
+ _globals['_TRANSCRIPTREQUEST']._serialized_end=2704
+ _globals['_TRANSCRIPTRESULT']._serialized_start=2706
+ _globals['_TRANSCRIPTRESULT']._serialized_end=2784
+ _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786
+ _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875
+ _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878
+ _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093
+ _globals['_TTSREQUEST']._serialized_start=3095
+ _globals['_TTSREQUEST']._serialized_end=3164
+ _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166
+ _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220
+ _globals['_MEMORYUSAGEDATA']._serialized_start=3223
+ _globals['_MEMORYUSAGEDATA']._serialized_end=3365
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365
+ _globals['_STATUSRESPONSE']._serialized_start=3368
+ _globals['_STATUSRESPONSE']._serialized_end=3541
+ _globals['_STATUSRESPONSE_STATE']._serialized_start=3474
+ _globals['_STATUSRESPONSE_STATE']._serialized_end=3541
+ _globals['_MESSAGE']._serialized_start=3543
+ _globals['_MESSAGE']._serialized_end=3583
+ _globals['_BACKEND']._serialized_start=3586
+ _globals['_BACKEND']._serialized_end=4477
# @@protoc_insertion_point(module_scope)
diff --git a/backend/python/sentencetransformers/backend_pb2_grpc.py b/backend/python/sentencetransformers/backend_pb2_grpc.py
index 79a7677f..e06fccf3 100644
--- a/backend/python/sentencetransformers/backend_pb2_grpc.py
+++ b/backend/python/sentencetransformers/backend_pb2_grpc.py
@@ -64,6 +64,26 @@ class BackendStub(object):
request_serializer=backend__pb2.HealthMessage.SerializeToString,
response_deserializer=backend__pb2.StatusResponse.FromString,
)
+ self.StoresSet = channel.unary_unary(
+ '/backend.Backend/StoresSet',
+ request_serializer=backend__pb2.StoresSetOptions.SerializeToString,
+ response_deserializer=backend__pb2.Result.FromString,
+ )
+ self.StoresDelete = channel.unary_unary(
+ '/backend.Backend/StoresDelete',
+ request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString,
+ response_deserializer=backend__pb2.Result.FromString,
+ )
+ self.StoresGet = channel.unary_unary(
+ '/backend.Backend/StoresGet',
+ request_serializer=backend__pb2.StoresGetOptions.SerializeToString,
+ response_deserializer=backend__pb2.StoresGetResult.FromString,
+ )
+ self.StoresFind = channel.unary_unary(
+ '/backend.Backend/StoresFind',
+ request_serializer=backend__pb2.StoresFindOptions.SerializeToString,
+ response_deserializer=backend__pb2.StoresFindResult.FromString,
+ )
class BackendServicer(object):
@@ -129,6 +149,30 @@ class BackendServicer(object):
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
+ def StoresSet(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresDelete(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresGet(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresFind(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
def add_BackendServicer_to_server(servicer, server):
rpc_method_handlers = {
@@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server):
request_deserializer=backend__pb2.HealthMessage.FromString,
response_serializer=backend__pb2.StatusResponse.SerializeToString,
),
+ 'StoresSet': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresSet,
+ request_deserializer=backend__pb2.StoresSetOptions.FromString,
+ response_serializer=backend__pb2.Result.SerializeToString,
+ ),
+ 'StoresDelete': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresDelete,
+ request_deserializer=backend__pb2.StoresDeleteOptions.FromString,
+ response_serializer=backend__pb2.Result.SerializeToString,
+ ),
+ 'StoresGet': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresGet,
+ request_deserializer=backend__pb2.StoresGetOptions.FromString,
+ response_serializer=backend__pb2.StoresGetResult.SerializeToString,
+ ),
+ 'StoresFind': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresFind,
+ request_deserializer=backend__pb2.StoresFindOptions.FromString,
+ response_serializer=backend__pb2.StoresFindResult.SerializeToString,
+ ),
}
generic_handler = grpc.method_handlers_generic_handler(
'backend.Backend', rpc_method_handlers)
@@ -361,3 +425,71 @@ class Backend(object):
backend__pb2.StatusResponse.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresSet(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet',
+ backend__pb2.StoresSetOptions.SerializeToString,
+ backend__pb2.Result.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresDelete(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete',
+ backend__pb2.StoresDeleteOptions.SerializeToString,
+ backend__pb2.Result.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresGet(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet',
+ backend__pb2.StoresGetOptions.SerializeToString,
+ backend__pb2.StoresGetResult.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresFind(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind',
+ backend__pb2.StoresFindOptions.SerializeToString,
+ backend__pb2.StoresFindResult.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
diff --git a/backend/python/transformers-musicgen/backend_pb2.py b/backend/python/transformers-musicgen/backend_pb2.py
index 08b896c7..24b6de3b 100644
--- a/backend/python/transformers-musicgen/backend_pb2.py
+++ b/backend/python/transformers-musicgen/backend_pb2.py
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: backend.proto
+# Protobuf Python Version: 4.25.1
"""Generated protocol buffer code."""
from google.protobuf import descriptor as _descriptor
from google.protobuf import descriptor_pool as _descriptor_pool
@@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default()
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
_globals = globals()
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals)
if _descriptor._USE_C_DESCRIPTORS == False:
-
- DESCRIPTOR._options = None
- DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
- _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None
- _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001'
- _globals['_HEALTHMESSAGE']._serialized_start=26
- _globals['_HEALTHMESSAGE']._serialized_end=41
- _globals['_PREDICTOPTIONS']._serialized_start=44
- _globals['_PREDICTOPTIONS']._serialized_end=850
- _globals['_REPLY']._serialized_start=852
- _globals['_REPLY']._serialized_end=876
- _globals['_MODELOPTIONS']._serialized_start=879
- _globals['_MODELOPTIONS']._serialized_end=1951
- _globals['_RESULT']._serialized_start=1953
- _globals['_RESULT']._serialized_end=1995
- _globals['_EMBEDDINGRESULT']._serialized_start=1997
- _globals['_EMBEDDINGRESULT']._serialized_end=2034
- _globals['_TRANSCRIPTREQUEST']._serialized_start=2036
- _globals['_TRANSCRIPTREQUEST']._serialized_end=2103
- _globals['_TRANSCRIPTRESULT']._serialized_start=2105
- _globals['_TRANSCRIPTRESULT']._serialized_end=2183
- _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185
- _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274
- _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277
- _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492
- _globals['_TTSREQUEST']._serialized_start=2494
- _globals['_TTSREQUEST']._serialized_end=2563
- _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565
- _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619
- _globals['_MEMORYUSAGEDATA']._serialized_start=2622
- _globals['_MEMORYUSAGEDATA']._serialized_end=2764
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764
- _globals['_STATUSRESPONSE']._serialized_start=2767
- _globals['_STATUSRESPONSE']._serialized_end=2940
- _globals['_STATUSRESPONSE_STATE']._serialized_start=2873
- _globals['_STATUSRESPONSE_STATE']._serialized_end=2940
- _globals['_BACKEND']._serialized_start=2943
- _globals['_BACKEND']._serialized_end=3571
+ _globals['DESCRIPTOR']._options = None
+ _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001'
+ _globals['_STORESKEY']._serialized_start=26
+ _globals['_STORESKEY']._serialized_end=53
+ _globals['_STORESVALUE']._serialized_start=55
+ _globals['_STORESVALUE']._serialized_end=83
+ _globals['_STORESSETOPTIONS']._serialized_start=85
+ _globals['_STORESSETOPTIONS']._serialized_end=175
+ _globals['_STORESDELETEOPTIONS']._serialized_start=177
+ _globals['_STORESDELETEOPTIONS']._serialized_end=232
+ _globals['_STORESGETOPTIONS']._serialized_start=234
+ _globals['_STORESGETOPTIONS']._serialized_end=286
+ _globals['_STORESGETRESULT']._serialized_start=288
+ _globals['_STORESGETRESULT']._serialized_end=377
+ _globals['_STORESFINDOPTIONS']._serialized_start=379
+ _globals['_STORESFINDOPTIONS']._serialized_end=445
+ _globals['_STORESFINDRESULT']._serialized_start=447
+ _globals['_STORESFINDRESULT']._serialized_end=559
+ _globals['_HEALTHMESSAGE']._serialized_start=561
+ _globals['_HEALTHMESSAGE']._serialized_end=576
+ _globals['_PREDICTOPTIONS']._serialized_start=579
+ _globals['_PREDICTOPTIONS']._serialized_end=1451
+ _globals['_REPLY']._serialized_start=1453
+ _globals['_REPLY']._serialized_end=1477
+ _globals['_MODELOPTIONS']._serialized_start=1480
+ _globals['_MODELOPTIONS']._serialized_end=2552
+ _globals['_RESULT']._serialized_start=2554
+ _globals['_RESULT']._serialized_end=2596
+ _globals['_EMBEDDINGRESULT']._serialized_start=2598
+ _globals['_EMBEDDINGRESULT']._serialized_end=2635
+ _globals['_TRANSCRIPTREQUEST']._serialized_start=2637
+ _globals['_TRANSCRIPTREQUEST']._serialized_end=2704
+ _globals['_TRANSCRIPTRESULT']._serialized_start=2706
+ _globals['_TRANSCRIPTRESULT']._serialized_end=2784
+ _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786
+ _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875
+ _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878
+ _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093
+ _globals['_TTSREQUEST']._serialized_start=3095
+ _globals['_TTSREQUEST']._serialized_end=3164
+ _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166
+ _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220
+ _globals['_MEMORYUSAGEDATA']._serialized_start=3223
+ _globals['_MEMORYUSAGEDATA']._serialized_end=3365
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365
+ _globals['_STATUSRESPONSE']._serialized_start=3368
+ _globals['_STATUSRESPONSE']._serialized_end=3541
+ _globals['_STATUSRESPONSE_STATE']._serialized_start=3474
+ _globals['_STATUSRESPONSE_STATE']._serialized_end=3541
+ _globals['_MESSAGE']._serialized_start=3543
+ _globals['_MESSAGE']._serialized_end=3583
+ _globals['_BACKEND']._serialized_start=3586
+ _globals['_BACKEND']._serialized_end=4477
# @@protoc_insertion_point(module_scope)
diff --git a/backend/python/transformers-musicgen/backend_pb2_grpc.py b/backend/python/transformers-musicgen/backend_pb2_grpc.py
index 79a7677f..e06fccf3 100644
--- a/backend/python/transformers-musicgen/backend_pb2_grpc.py
+++ b/backend/python/transformers-musicgen/backend_pb2_grpc.py
@@ -64,6 +64,26 @@ class BackendStub(object):
request_serializer=backend__pb2.HealthMessage.SerializeToString,
response_deserializer=backend__pb2.StatusResponse.FromString,
)
+ self.StoresSet = channel.unary_unary(
+ '/backend.Backend/StoresSet',
+ request_serializer=backend__pb2.StoresSetOptions.SerializeToString,
+ response_deserializer=backend__pb2.Result.FromString,
+ )
+ self.StoresDelete = channel.unary_unary(
+ '/backend.Backend/StoresDelete',
+ request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString,
+ response_deserializer=backend__pb2.Result.FromString,
+ )
+ self.StoresGet = channel.unary_unary(
+ '/backend.Backend/StoresGet',
+ request_serializer=backend__pb2.StoresGetOptions.SerializeToString,
+ response_deserializer=backend__pb2.StoresGetResult.FromString,
+ )
+ self.StoresFind = channel.unary_unary(
+ '/backend.Backend/StoresFind',
+ request_serializer=backend__pb2.StoresFindOptions.SerializeToString,
+ response_deserializer=backend__pb2.StoresFindResult.FromString,
+ )
class BackendServicer(object):
@@ -129,6 +149,30 @@ class BackendServicer(object):
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
+ def StoresSet(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresDelete(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresGet(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresFind(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
def add_BackendServicer_to_server(servicer, server):
rpc_method_handlers = {
@@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server):
request_deserializer=backend__pb2.HealthMessage.FromString,
response_serializer=backend__pb2.StatusResponse.SerializeToString,
),
+ 'StoresSet': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresSet,
+ request_deserializer=backend__pb2.StoresSetOptions.FromString,
+ response_serializer=backend__pb2.Result.SerializeToString,
+ ),
+ 'StoresDelete': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresDelete,
+ request_deserializer=backend__pb2.StoresDeleteOptions.FromString,
+ response_serializer=backend__pb2.Result.SerializeToString,
+ ),
+ 'StoresGet': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresGet,
+ request_deserializer=backend__pb2.StoresGetOptions.FromString,
+ response_serializer=backend__pb2.StoresGetResult.SerializeToString,
+ ),
+ 'StoresFind': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresFind,
+ request_deserializer=backend__pb2.StoresFindOptions.FromString,
+ response_serializer=backend__pb2.StoresFindResult.SerializeToString,
+ ),
}
generic_handler = grpc.method_handlers_generic_handler(
'backend.Backend', rpc_method_handlers)
@@ -361,3 +425,71 @@ class Backend(object):
backend__pb2.StatusResponse.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresSet(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet',
+ backend__pb2.StoresSetOptions.SerializeToString,
+ backend__pb2.Result.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresDelete(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete',
+ backend__pb2.StoresDeleteOptions.SerializeToString,
+ backend__pb2.Result.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresGet(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet',
+ backend__pb2.StoresGetOptions.SerializeToString,
+ backend__pb2.StoresGetResult.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresFind(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind',
+ backend__pb2.StoresFindOptions.SerializeToString,
+ backend__pb2.StoresFindResult.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
diff --git a/backend/python/transformers/backend_pb2.py b/backend/python/transformers/backend_pb2.py
index 08b896c7..24b6de3b 100644
--- a/backend/python/transformers/backend_pb2.py
+++ b/backend/python/transformers/backend_pb2.py
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: backend.proto
+# Protobuf Python Version: 4.25.1
"""Generated protocol buffer code."""
from google.protobuf import descriptor as _descriptor
from google.protobuf import descriptor_pool as _descriptor_pool
@@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default()
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
_globals = globals()
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals)
if _descriptor._USE_C_DESCRIPTORS == False:
-
- DESCRIPTOR._options = None
- DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
- _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None
- _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001'
- _globals['_HEALTHMESSAGE']._serialized_start=26
- _globals['_HEALTHMESSAGE']._serialized_end=41
- _globals['_PREDICTOPTIONS']._serialized_start=44
- _globals['_PREDICTOPTIONS']._serialized_end=850
- _globals['_REPLY']._serialized_start=852
- _globals['_REPLY']._serialized_end=876
- _globals['_MODELOPTIONS']._serialized_start=879
- _globals['_MODELOPTIONS']._serialized_end=1951
- _globals['_RESULT']._serialized_start=1953
- _globals['_RESULT']._serialized_end=1995
- _globals['_EMBEDDINGRESULT']._serialized_start=1997
- _globals['_EMBEDDINGRESULT']._serialized_end=2034
- _globals['_TRANSCRIPTREQUEST']._serialized_start=2036
- _globals['_TRANSCRIPTREQUEST']._serialized_end=2103
- _globals['_TRANSCRIPTRESULT']._serialized_start=2105
- _globals['_TRANSCRIPTRESULT']._serialized_end=2183
- _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185
- _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274
- _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277
- _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492
- _globals['_TTSREQUEST']._serialized_start=2494
- _globals['_TTSREQUEST']._serialized_end=2563
- _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565
- _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619
- _globals['_MEMORYUSAGEDATA']._serialized_start=2622
- _globals['_MEMORYUSAGEDATA']._serialized_end=2764
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764
- _globals['_STATUSRESPONSE']._serialized_start=2767
- _globals['_STATUSRESPONSE']._serialized_end=2940
- _globals['_STATUSRESPONSE_STATE']._serialized_start=2873
- _globals['_STATUSRESPONSE_STATE']._serialized_end=2940
- _globals['_BACKEND']._serialized_start=2943
- _globals['_BACKEND']._serialized_end=3571
+ _globals['DESCRIPTOR']._options = None
+ _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001'
+ _globals['_STORESKEY']._serialized_start=26
+ _globals['_STORESKEY']._serialized_end=53
+ _globals['_STORESVALUE']._serialized_start=55
+ _globals['_STORESVALUE']._serialized_end=83
+ _globals['_STORESSETOPTIONS']._serialized_start=85
+ _globals['_STORESSETOPTIONS']._serialized_end=175
+ _globals['_STORESDELETEOPTIONS']._serialized_start=177
+ _globals['_STORESDELETEOPTIONS']._serialized_end=232
+ _globals['_STORESGETOPTIONS']._serialized_start=234
+ _globals['_STORESGETOPTIONS']._serialized_end=286
+ _globals['_STORESGETRESULT']._serialized_start=288
+ _globals['_STORESGETRESULT']._serialized_end=377
+ _globals['_STORESFINDOPTIONS']._serialized_start=379
+ _globals['_STORESFINDOPTIONS']._serialized_end=445
+ _globals['_STORESFINDRESULT']._serialized_start=447
+ _globals['_STORESFINDRESULT']._serialized_end=559
+ _globals['_HEALTHMESSAGE']._serialized_start=561
+ _globals['_HEALTHMESSAGE']._serialized_end=576
+ _globals['_PREDICTOPTIONS']._serialized_start=579
+ _globals['_PREDICTOPTIONS']._serialized_end=1451
+ _globals['_REPLY']._serialized_start=1453
+ _globals['_REPLY']._serialized_end=1477
+ _globals['_MODELOPTIONS']._serialized_start=1480
+ _globals['_MODELOPTIONS']._serialized_end=2552
+ _globals['_RESULT']._serialized_start=2554
+ _globals['_RESULT']._serialized_end=2596
+ _globals['_EMBEDDINGRESULT']._serialized_start=2598
+ _globals['_EMBEDDINGRESULT']._serialized_end=2635
+ _globals['_TRANSCRIPTREQUEST']._serialized_start=2637
+ _globals['_TRANSCRIPTREQUEST']._serialized_end=2704
+ _globals['_TRANSCRIPTRESULT']._serialized_start=2706
+ _globals['_TRANSCRIPTRESULT']._serialized_end=2784
+ _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786
+ _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875
+ _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878
+ _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093
+ _globals['_TTSREQUEST']._serialized_start=3095
+ _globals['_TTSREQUEST']._serialized_end=3164
+ _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166
+ _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220
+ _globals['_MEMORYUSAGEDATA']._serialized_start=3223
+ _globals['_MEMORYUSAGEDATA']._serialized_end=3365
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365
+ _globals['_STATUSRESPONSE']._serialized_start=3368
+ _globals['_STATUSRESPONSE']._serialized_end=3541
+ _globals['_STATUSRESPONSE_STATE']._serialized_start=3474
+ _globals['_STATUSRESPONSE_STATE']._serialized_end=3541
+ _globals['_MESSAGE']._serialized_start=3543
+ _globals['_MESSAGE']._serialized_end=3583
+ _globals['_BACKEND']._serialized_start=3586
+ _globals['_BACKEND']._serialized_end=4477
# @@protoc_insertion_point(module_scope)
diff --git a/backend/python/transformers/backend_pb2_grpc.py b/backend/python/transformers/backend_pb2_grpc.py
index 79a7677f..e06fccf3 100644
--- a/backend/python/transformers/backend_pb2_grpc.py
+++ b/backend/python/transformers/backend_pb2_grpc.py
@@ -64,6 +64,26 @@ class BackendStub(object):
request_serializer=backend__pb2.HealthMessage.SerializeToString,
response_deserializer=backend__pb2.StatusResponse.FromString,
)
+ self.StoresSet = channel.unary_unary(
+ '/backend.Backend/StoresSet',
+ request_serializer=backend__pb2.StoresSetOptions.SerializeToString,
+ response_deserializer=backend__pb2.Result.FromString,
+ )
+ self.StoresDelete = channel.unary_unary(
+ '/backend.Backend/StoresDelete',
+ request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString,
+ response_deserializer=backend__pb2.Result.FromString,
+ )
+ self.StoresGet = channel.unary_unary(
+ '/backend.Backend/StoresGet',
+ request_serializer=backend__pb2.StoresGetOptions.SerializeToString,
+ response_deserializer=backend__pb2.StoresGetResult.FromString,
+ )
+ self.StoresFind = channel.unary_unary(
+ '/backend.Backend/StoresFind',
+ request_serializer=backend__pb2.StoresFindOptions.SerializeToString,
+ response_deserializer=backend__pb2.StoresFindResult.FromString,
+ )
class BackendServicer(object):
@@ -129,6 +149,30 @@ class BackendServicer(object):
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
+ def StoresSet(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresDelete(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresGet(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresFind(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
def add_BackendServicer_to_server(servicer, server):
rpc_method_handlers = {
@@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server):
request_deserializer=backend__pb2.HealthMessage.FromString,
response_serializer=backend__pb2.StatusResponse.SerializeToString,
),
+ 'StoresSet': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresSet,
+ request_deserializer=backend__pb2.StoresSetOptions.FromString,
+ response_serializer=backend__pb2.Result.SerializeToString,
+ ),
+ 'StoresDelete': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresDelete,
+ request_deserializer=backend__pb2.StoresDeleteOptions.FromString,
+ response_serializer=backend__pb2.Result.SerializeToString,
+ ),
+ 'StoresGet': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresGet,
+ request_deserializer=backend__pb2.StoresGetOptions.FromString,
+ response_serializer=backend__pb2.StoresGetResult.SerializeToString,
+ ),
+ 'StoresFind': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresFind,
+ request_deserializer=backend__pb2.StoresFindOptions.FromString,
+ response_serializer=backend__pb2.StoresFindResult.SerializeToString,
+ ),
}
generic_handler = grpc.method_handlers_generic_handler(
'backend.Backend', rpc_method_handlers)
@@ -361,3 +425,71 @@ class Backend(object):
backend__pb2.StatusResponse.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresSet(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet',
+ backend__pb2.StoresSetOptions.SerializeToString,
+ backend__pb2.Result.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresDelete(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete',
+ backend__pb2.StoresDeleteOptions.SerializeToString,
+ backend__pb2.Result.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresGet(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet',
+ backend__pb2.StoresGetOptions.SerializeToString,
+ backend__pb2.StoresGetResult.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresFind(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind',
+ backend__pb2.StoresFindOptions.SerializeToString,
+ backend__pb2.StoresFindResult.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
diff --git a/backend/python/vall-e-x/backend_pb2.py b/backend/python/vall-e-x/backend_pb2.py
index 08b896c7..24b6de3b 100644
--- a/backend/python/vall-e-x/backend_pb2.py
+++ b/backend/python/vall-e-x/backend_pb2.py
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: backend.proto
+# Protobuf Python Version: 4.25.1
"""Generated protocol buffer code."""
from google.protobuf import descriptor as _descriptor
from google.protobuf import descriptor_pool as _descriptor_pool
@@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default()
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
_globals = globals()
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals)
if _descriptor._USE_C_DESCRIPTORS == False:
-
- DESCRIPTOR._options = None
- DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
- _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None
- _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001'
- _globals['_HEALTHMESSAGE']._serialized_start=26
- _globals['_HEALTHMESSAGE']._serialized_end=41
- _globals['_PREDICTOPTIONS']._serialized_start=44
- _globals['_PREDICTOPTIONS']._serialized_end=850
- _globals['_REPLY']._serialized_start=852
- _globals['_REPLY']._serialized_end=876
- _globals['_MODELOPTIONS']._serialized_start=879
- _globals['_MODELOPTIONS']._serialized_end=1951
- _globals['_RESULT']._serialized_start=1953
- _globals['_RESULT']._serialized_end=1995
- _globals['_EMBEDDINGRESULT']._serialized_start=1997
- _globals['_EMBEDDINGRESULT']._serialized_end=2034
- _globals['_TRANSCRIPTREQUEST']._serialized_start=2036
- _globals['_TRANSCRIPTREQUEST']._serialized_end=2103
- _globals['_TRANSCRIPTRESULT']._serialized_start=2105
- _globals['_TRANSCRIPTRESULT']._serialized_end=2183
- _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185
- _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274
- _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277
- _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492
- _globals['_TTSREQUEST']._serialized_start=2494
- _globals['_TTSREQUEST']._serialized_end=2563
- _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565
- _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619
- _globals['_MEMORYUSAGEDATA']._serialized_start=2622
- _globals['_MEMORYUSAGEDATA']._serialized_end=2764
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764
- _globals['_STATUSRESPONSE']._serialized_start=2767
- _globals['_STATUSRESPONSE']._serialized_end=2940
- _globals['_STATUSRESPONSE_STATE']._serialized_start=2873
- _globals['_STATUSRESPONSE_STATE']._serialized_end=2940
- _globals['_BACKEND']._serialized_start=2943
- _globals['_BACKEND']._serialized_end=3571
+ _globals['DESCRIPTOR']._options = None
+ _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001'
+ _globals['_STORESKEY']._serialized_start=26
+ _globals['_STORESKEY']._serialized_end=53
+ _globals['_STORESVALUE']._serialized_start=55
+ _globals['_STORESVALUE']._serialized_end=83
+ _globals['_STORESSETOPTIONS']._serialized_start=85
+ _globals['_STORESSETOPTIONS']._serialized_end=175
+ _globals['_STORESDELETEOPTIONS']._serialized_start=177
+ _globals['_STORESDELETEOPTIONS']._serialized_end=232
+ _globals['_STORESGETOPTIONS']._serialized_start=234
+ _globals['_STORESGETOPTIONS']._serialized_end=286
+ _globals['_STORESGETRESULT']._serialized_start=288
+ _globals['_STORESGETRESULT']._serialized_end=377
+ _globals['_STORESFINDOPTIONS']._serialized_start=379
+ _globals['_STORESFINDOPTIONS']._serialized_end=445
+ _globals['_STORESFINDRESULT']._serialized_start=447
+ _globals['_STORESFINDRESULT']._serialized_end=559
+ _globals['_HEALTHMESSAGE']._serialized_start=561
+ _globals['_HEALTHMESSAGE']._serialized_end=576
+ _globals['_PREDICTOPTIONS']._serialized_start=579
+ _globals['_PREDICTOPTIONS']._serialized_end=1451
+ _globals['_REPLY']._serialized_start=1453
+ _globals['_REPLY']._serialized_end=1477
+ _globals['_MODELOPTIONS']._serialized_start=1480
+ _globals['_MODELOPTIONS']._serialized_end=2552
+ _globals['_RESULT']._serialized_start=2554
+ _globals['_RESULT']._serialized_end=2596
+ _globals['_EMBEDDINGRESULT']._serialized_start=2598
+ _globals['_EMBEDDINGRESULT']._serialized_end=2635
+ _globals['_TRANSCRIPTREQUEST']._serialized_start=2637
+ _globals['_TRANSCRIPTREQUEST']._serialized_end=2704
+ _globals['_TRANSCRIPTRESULT']._serialized_start=2706
+ _globals['_TRANSCRIPTRESULT']._serialized_end=2784
+ _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786
+ _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875
+ _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878
+ _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093
+ _globals['_TTSREQUEST']._serialized_start=3095
+ _globals['_TTSREQUEST']._serialized_end=3164
+ _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166
+ _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220
+ _globals['_MEMORYUSAGEDATA']._serialized_start=3223
+ _globals['_MEMORYUSAGEDATA']._serialized_end=3365
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365
+ _globals['_STATUSRESPONSE']._serialized_start=3368
+ _globals['_STATUSRESPONSE']._serialized_end=3541
+ _globals['_STATUSRESPONSE_STATE']._serialized_start=3474
+ _globals['_STATUSRESPONSE_STATE']._serialized_end=3541
+ _globals['_MESSAGE']._serialized_start=3543
+ _globals['_MESSAGE']._serialized_end=3583
+ _globals['_BACKEND']._serialized_start=3586
+ _globals['_BACKEND']._serialized_end=4477
# @@protoc_insertion_point(module_scope)
diff --git a/backend/python/vall-e-x/backend_pb2_grpc.py b/backend/python/vall-e-x/backend_pb2_grpc.py
index 79a7677f..e06fccf3 100644
--- a/backend/python/vall-e-x/backend_pb2_grpc.py
+++ b/backend/python/vall-e-x/backend_pb2_grpc.py
@@ -64,6 +64,26 @@ class BackendStub(object):
request_serializer=backend__pb2.HealthMessage.SerializeToString,
response_deserializer=backend__pb2.StatusResponse.FromString,
)
+ self.StoresSet = channel.unary_unary(
+ '/backend.Backend/StoresSet',
+ request_serializer=backend__pb2.StoresSetOptions.SerializeToString,
+ response_deserializer=backend__pb2.Result.FromString,
+ )
+ self.StoresDelete = channel.unary_unary(
+ '/backend.Backend/StoresDelete',
+ request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString,
+ response_deserializer=backend__pb2.Result.FromString,
+ )
+ self.StoresGet = channel.unary_unary(
+ '/backend.Backend/StoresGet',
+ request_serializer=backend__pb2.StoresGetOptions.SerializeToString,
+ response_deserializer=backend__pb2.StoresGetResult.FromString,
+ )
+ self.StoresFind = channel.unary_unary(
+ '/backend.Backend/StoresFind',
+ request_serializer=backend__pb2.StoresFindOptions.SerializeToString,
+ response_deserializer=backend__pb2.StoresFindResult.FromString,
+ )
class BackendServicer(object):
@@ -129,6 +149,30 @@ class BackendServicer(object):
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
+ def StoresSet(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresDelete(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresGet(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresFind(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
def add_BackendServicer_to_server(servicer, server):
rpc_method_handlers = {
@@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server):
request_deserializer=backend__pb2.HealthMessage.FromString,
response_serializer=backend__pb2.StatusResponse.SerializeToString,
),
+ 'StoresSet': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresSet,
+ request_deserializer=backend__pb2.StoresSetOptions.FromString,
+ response_serializer=backend__pb2.Result.SerializeToString,
+ ),
+ 'StoresDelete': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresDelete,
+ request_deserializer=backend__pb2.StoresDeleteOptions.FromString,
+ response_serializer=backend__pb2.Result.SerializeToString,
+ ),
+ 'StoresGet': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresGet,
+ request_deserializer=backend__pb2.StoresGetOptions.FromString,
+ response_serializer=backend__pb2.StoresGetResult.SerializeToString,
+ ),
+ 'StoresFind': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresFind,
+ request_deserializer=backend__pb2.StoresFindOptions.FromString,
+ response_serializer=backend__pb2.StoresFindResult.SerializeToString,
+ ),
}
generic_handler = grpc.method_handlers_generic_handler(
'backend.Backend', rpc_method_handlers)
@@ -361,3 +425,71 @@ class Backend(object):
backend__pb2.StatusResponse.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresSet(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet',
+ backend__pb2.StoresSetOptions.SerializeToString,
+ backend__pb2.Result.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresDelete(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete',
+ backend__pb2.StoresDeleteOptions.SerializeToString,
+ backend__pb2.Result.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresGet(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet',
+ backend__pb2.StoresGetOptions.SerializeToString,
+ backend__pb2.StoresGetResult.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresFind(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind',
+ backend__pb2.StoresFindOptions.SerializeToString,
+ backend__pb2.StoresFindResult.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
diff --git a/backend/python/vllm/backend_pb2.py b/backend/python/vllm/backend_pb2.py
index 08b896c7..24b6de3b 100644
--- a/backend/python/vllm/backend_pb2.py
+++ b/backend/python/vllm/backend_pb2.py
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: backend.proto
+# Protobuf Python Version: 4.25.1
"""Generated protocol buffer code."""
from google.protobuf import descriptor as _descriptor
from google.protobuf import descriptor_pool as _descriptor_pool
@@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default()
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
_globals = globals()
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals)
if _descriptor._USE_C_DESCRIPTORS == False:
-
- DESCRIPTOR._options = None
- DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
- _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None
- _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001'
- _globals['_HEALTHMESSAGE']._serialized_start=26
- _globals['_HEALTHMESSAGE']._serialized_end=41
- _globals['_PREDICTOPTIONS']._serialized_start=44
- _globals['_PREDICTOPTIONS']._serialized_end=850
- _globals['_REPLY']._serialized_start=852
- _globals['_REPLY']._serialized_end=876
- _globals['_MODELOPTIONS']._serialized_start=879
- _globals['_MODELOPTIONS']._serialized_end=1951
- _globals['_RESULT']._serialized_start=1953
- _globals['_RESULT']._serialized_end=1995
- _globals['_EMBEDDINGRESULT']._serialized_start=1997
- _globals['_EMBEDDINGRESULT']._serialized_end=2034
- _globals['_TRANSCRIPTREQUEST']._serialized_start=2036
- _globals['_TRANSCRIPTREQUEST']._serialized_end=2103
- _globals['_TRANSCRIPTRESULT']._serialized_start=2105
- _globals['_TRANSCRIPTRESULT']._serialized_end=2183
- _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185
- _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274
- _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277
- _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492
- _globals['_TTSREQUEST']._serialized_start=2494
- _globals['_TTSREQUEST']._serialized_end=2563
- _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565
- _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619
- _globals['_MEMORYUSAGEDATA']._serialized_start=2622
- _globals['_MEMORYUSAGEDATA']._serialized_end=2764
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764
- _globals['_STATUSRESPONSE']._serialized_start=2767
- _globals['_STATUSRESPONSE']._serialized_end=2940
- _globals['_STATUSRESPONSE_STATE']._serialized_start=2873
- _globals['_STATUSRESPONSE_STATE']._serialized_end=2940
- _globals['_BACKEND']._serialized_start=2943
- _globals['_BACKEND']._serialized_end=3571
+ _globals['DESCRIPTOR']._options = None
+ _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001'
+ _globals['_STORESKEY']._serialized_start=26
+ _globals['_STORESKEY']._serialized_end=53
+ _globals['_STORESVALUE']._serialized_start=55
+ _globals['_STORESVALUE']._serialized_end=83
+ _globals['_STORESSETOPTIONS']._serialized_start=85
+ _globals['_STORESSETOPTIONS']._serialized_end=175
+ _globals['_STORESDELETEOPTIONS']._serialized_start=177
+ _globals['_STORESDELETEOPTIONS']._serialized_end=232
+ _globals['_STORESGETOPTIONS']._serialized_start=234
+ _globals['_STORESGETOPTIONS']._serialized_end=286
+ _globals['_STORESGETRESULT']._serialized_start=288
+ _globals['_STORESGETRESULT']._serialized_end=377
+ _globals['_STORESFINDOPTIONS']._serialized_start=379
+ _globals['_STORESFINDOPTIONS']._serialized_end=445
+ _globals['_STORESFINDRESULT']._serialized_start=447
+ _globals['_STORESFINDRESULT']._serialized_end=559
+ _globals['_HEALTHMESSAGE']._serialized_start=561
+ _globals['_HEALTHMESSAGE']._serialized_end=576
+ _globals['_PREDICTOPTIONS']._serialized_start=579
+ _globals['_PREDICTOPTIONS']._serialized_end=1451
+ _globals['_REPLY']._serialized_start=1453
+ _globals['_REPLY']._serialized_end=1477
+ _globals['_MODELOPTIONS']._serialized_start=1480
+ _globals['_MODELOPTIONS']._serialized_end=2552
+ _globals['_RESULT']._serialized_start=2554
+ _globals['_RESULT']._serialized_end=2596
+ _globals['_EMBEDDINGRESULT']._serialized_start=2598
+ _globals['_EMBEDDINGRESULT']._serialized_end=2635
+ _globals['_TRANSCRIPTREQUEST']._serialized_start=2637
+ _globals['_TRANSCRIPTREQUEST']._serialized_end=2704
+ _globals['_TRANSCRIPTRESULT']._serialized_start=2706
+ _globals['_TRANSCRIPTRESULT']._serialized_end=2784
+ _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786
+ _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875
+ _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878
+ _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093
+ _globals['_TTSREQUEST']._serialized_start=3095
+ _globals['_TTSREQUEST']._serialized_end=3164
+ _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166
+ _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220
+ _globals['_MEMORYUSAGEDATA']._serialized_start=3223
+ _globals['_MEMORYUSAGEDATA']._serialized_end=3365
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317
+ _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365
+ _globals['_STATUSRESPONSE']._serialized_start=3368
+ _globals['_STATUSRESPONSE']._serialized_end=3541
+ _globals['_STATUSRESPONSE_STATE']._serialized_start=3474
+ _globals['_STATUSRESPONSE_STATE']._serialized_end=3541
+ _globals['_MESSAGE']._serialized_start=3543
+ _globals['_MESSAGE']._serialized_end=3583
+ _globals['_BACKEND']._serialized_start=3586
+ _globals['_BACKEND']._serialized_end=4477
# @@protoc_insertion_point(module_scope)
diff --git a/backend/python/vllm/backend_pb2_grpc.py b/backend/python/vllm/backend_pb2_grpc.py
index 79a7677f..e06fccf3 100644
--- a/backend/python/vllm/backend_pb2_grpc.py
+++ b/backend/python/vllm/backend_pb2_grpc.py
@@ -64,6 +64,26 @@ class BackendStub(object):
request_serializer=backend__pb2.HealthMessage.SerializeToString,
response_deserializer=backend__pb2.StatusResponse.FromString,
)
+ self.StoresSet = channel.unary_unary(
+ '/backend.Backend/StoresSet',
+ request_serializer=backend__pb2.StoresSetOptions.SerializeToString,
+ response_deserializer=backend__pb2.Result.FromString,
+ )
+ self.StoresDelete = channel.unary_unary(
+ '/backend.Backend/StoresDelete',
+ request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString,
+ response_deserializer=backend__pb2.Result.FromString,
+ )
+ self.StoresGet = channel.unary_unary(
+ '/backend.Backend/StoresGet',
+ request_serializer=backend__pb2.StoresGetOptions.SerializeToString,
+ response_deserializer=backend__pb2.StoresGetResult.FromString,
+ )
+ self.StoresFind = channel.unary_unary(
+ '/backend.Backend/StoresFind',
+ request_serializer=backend__pb2.StoresFindOptions.SerializeToString,
+ response_deserializer=backend__pb2.StoresFindResult.FromString,
+ )
class BackendServicer(object):
@@ -129,6 +149,30 @@ class BackendServicer(object):
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
+ def StoresSet(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresDelete(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresGet(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
+ def StoresFind(self, request, context):
+ """Missing associated documentation comment in .proto file."""
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+ context.set_details('Method not implemented!')
+ raise NotImplementedError('Method not implemented!')
+
def add_BackendServicer_to_server(servicer, server):
rpc_method_handlers = {
@@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server):
request_deserializer=backend__pb2.HealthMessage.FromString,
response_serializer=backend__pb2.StatusResponse.SerializeToString,
),
+ 'StoresSet': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresSet,
+ request_deserializer=backend__pb2.StoresSetOptions.FromString,
+ response_serializer=backend__pb2.Result.SerializeToString,
+ ),
+ 'StoresDelete': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresDelete,
+ request_deserializer=backend__pb2.StoresDeleteOptions.FromString,
+ response_serializer=backend__pb2.Result.SerializeToString,
+ ),
+ 'StoresGet': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresGet,
+ request_deserializer=backend__pb2.StoresGetOptions.FromString,
+ response_serializer=backend__pb2.StoresGetResult.SerializeToString,
+ ),
+ 'StoresFind': grpc.unary_unary_rpc_method_handler(
+ servicer.StoresFind,
+ request_deserializer=backend__pb2.StoresFindOptions.FromString,
+ response_serializer=backend__pb2.StoresFindResult.SerializeToString,
+ ),
}
generic_handler = grpc.method_handlers_generic_handler(
'backend.Backend', rpc_method_handlers)
@@ -361,3 +425,71 @@ class Backend(object):
backend__pb2.StatusResponse.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresSet(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet',
+ backend__pb2.StoresSetOptions.SerializeToString,
+ backend__pb2.Result.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresDelete(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete',
+ backend__pb2.StoresDeleteOptions.SerializeToString,
+ backend__pb2.Result.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresGet(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet',
+ backend__pb2.StoresGetOptions.SerializeToString,
+ backend__pb2.StoresGetResult.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+ @staticmethod
+ def StoresFind(request,
+ target,
+ options=(),
+ channel_credentials=None,
+ call_credentials=None,
+ insecure=False,
+ compression=None,
+ wait_for_ready=None,
+ timeout=None,
+ metadata=None):
+ return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind',
+ backend__pb2.StoresFindOptions.SerializeToString,
+ backend__pb2.StoresFindResult.FromString,
+ options, channel_credentials,
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
diff --git a/backend/python/vllm/backend_vllm.py b/backend/python/vllm/backend_vllm.py
index ef5134b8..ff0f0b26 100644
--- a/backend/python/vllm/backend_vllm.py
+++ b/backend/python/vllm/backend_vllm.py
@@ -14,6 +14,7 @@ from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.engine.async_llm_engine import AsyncLLMEngine
from vllm.sampling_params import SamplingParams
from vllm.utils import random_uuid
+from vllm.transformers_utils.tokenizer import get_tokenizer
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
@@ -71,7 +72,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
"""
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
- def LoadModel(self, request, context):
+ async def LoadModel(self, request, context):
"""
Loads a language model.
@@ -103,6 +104,18 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
self.llm = AsyncLLMEngine.from_engine_args(engine_args)
except Exception as err:
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
+
+ try:
+ engine_model_config = await self.llm.get_model_config()
+ self.tokenizer = get_tokenizer(
+ engine_model_config.tokenizer,
+ tokenizer_mode=engine_model_config.tokenizer_mode,
+ trust_remote_code=engine_model_config.trust_remote_code,
+ truncation_side="left",
+ )
+ except Exception as err:
+ return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
+
return backend_pb2.Result(message="Model loaded successfully", success=True)
async def Predict(self, request, context):
@@ -161,9 +174,15 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
if request.Seed != 0:
sampling_params.seed = request.Seed
+ prompt = request.Prompt
+
+ # If tokenizer template is enabled and messages are provided instead of prompt apply the tokenizer template
+ if not request.Prompt and request.UseTokenizerTemplate and request.Messages:
+ prompt = self.tokenizer.apply_chat_template(request.Messages, tokenize=False, add_generation_prompt=True)
+
# Generate text
request_id = random_uuid()
- outputs = self.llm.generate(request.Prompt, sampling_params, request_id)
+ outputs = self.llm.generate(prompt, sampling_params, request_id)
# Stream the results
generated_text = ""
diff --git a/core/backend/llm.go b/core/backend/llm.go
index d5e14df0..493dc25c 100644
--- a/core/backend/llm.go
+++ b/core/backend/llm.go
@@ -2,6 +2,7 @@ package backend
import (
"context"
+ "fmt"
"os"
"regexp"
"strings"
@@ -9,9 +10,11 @@ import (
"unicode/utf8"
"github.com/go-skynet/LocalAI/core/config"
+ "github.com/go-skynet/LocalAI/core/schema"
"github.com/go-skynet/LocalAI/pkg/gallery"
"github.com/go-skynet/LocalAI/pkg/grpc"
+ "github.com/go-skynet/LocalAI/pkg/grpc/proto"
model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/go-skynet/LocalAI/pkg/utils"
)
@@ -26,7 +29,7 @@ type TokenUsage struct {
Completion int
}
-func ModelInference(ctx context.Context, s string, images []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
+func ModelInference(ctx context.Context, s string, messages []schema.Message, images []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
modelFile := c.Model
threads := c.Threads
if *threads == 0 && o.Threads != 0 {
@@ -71,10 +74,30 @@ func ModelInference(ctx context.Context, s string, images []string, loader *mode
return nil, err
}
+ var protoMessages []*proto.Message
+ // if we are using the tokenizer template, we need to convert the messages to proto messages
+ // unless the prompt has already been tokenized (non-chat endpoints + functions)
+ if c.TemplateConfig.UseTokenizerTemplate && s == "" {
+ protoMessages = make([]*proto.Message, len(messages), len(messages))
+ for i, message := range messages {
+ protoMessages[i] = &proto.Message{
+ Role: message.Role,
+ }
+ switch ct := message.Content.(type) {
+ case string:
+ protoMessages[i].Content = ct
+ default:
+ return nil, fmt.Errorf("Unsupported type for schema.Message.Content for inference: %T", ct)
+ }
+ }
+ }
+
// in GRPC, the backend is supposed to answer to 1 single token if stream is not supported
fn := func() (LLMResponse, error) {
opts := gRPCPredictOpts(c, loader.ModelPath)
opts.Prompt = s
+ opts.Messages = protoMessages
+ opts.UseTokenizerTemplate = c.TemplateConfig.UseTokenizerTemplate
opts.Images = images
tokenUsage := TokenUsage{}
diff --git a/core/config/backend_config.go b/core/config/backend_config.go
index a90b1c1b..81c92d01 100644
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -165,11 +165,12 @@ type Functions struct {
}
type TemplateConfig struct {
- Chat string `yaml:"chat"`
- ChatMessage string `yaml:"chat_message"`
- Completion string `yaml:"completion"`
- Edit string `yaml:"edit"`
- Functions string `yaml:"function"`
+ Chat string `yaml:"chat"`
+ ChatMessage string `yaml:"chat_message"`
+ Completion string `yaml:"completion"`
+ Edit string `yaml:"edit"`
+ Functions string `yaml:"function"`
+ UseTokenizerTemplate bool `yaml:"use_tokenizer_template"`
}
func (c *BackendConfig) SetFunctionCallString(s string) {
diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go
index 871ae6c1..36d1142b 100644
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -230,112 +230,154 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
var predInput string
- suppressConfigSystemPrompt := false
- mess := []string{}
- for messageIndex, i := range input.Messages {
- var content string
- role := i.Role
+ // If we are using the tokenizer template, we don't need to process the messages
+ // unless we are processing functions
+ if !config.TemplateConfig.UseTokenizerTemplate || processFunctions {
- // if function call, we might want to customize the role so we can display better that the "assistant called a json action"
- // if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request
- if (i.FunctionCall != nil || i.ToolCalls != nil) && i.Role == "assistant" {
- roleFn := "assistant_function_call"
- r := config.Roles[roleFn]
- if r != "" {
- role = roleFn
- }
- }
- r := config.Roles[role]
- contentExists := i.Content != nil && i.StringContent != ""
+ suppressConfigSystemPrompt := false
+ mess := []string{}
+ for messageIndex, i := range input.Messages {
+ var content string
+ role := i.Role
- fcall := i.FunctionCall
- if len(i.ToolCalls) > 0 {
- fcall = i.ToolCalls
- }
-
- // First attempt to populate content via a chat message specific template
- if config.TemplateConfig.ChatMessage != "" {
- chatMessageData := model.ChatMessageTemplateData{
- SystemPrompt: config.SystemPrompt,
- Role: r,
- RoleName: role,
- Content: i.StringContent,
- FunctionCall: fcall,
- FunctionName: i.Name,
- LastMessage: messageIndex == (len(input.Messages) - 1),
- Function: config.Grammar != "" && (messageIndex == (len(input.Messages) - 1)),
- MessageIndex: messageIndex,
- }
- templatedChatMessage, err := ml.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData)
- if err != nil {
- log.Error().Err(err).Interface("message", chatMessageData).Str("template", config.TemplateConfig.ChatMessage).Msg("error processing message with template, skipping")
- } else {
- if templatedChatMessage == "" {
- log.Warn().Msgf("template \"%s\" produced blank output for %+v. Skipping!", config.TemplateConfig.ChatMessage, chatMessageData)
- continue // TODO: This continue is here intentionally to skip over the line `mess = append(mess, content)` below, and to prevent the sprintf
+ // if function call, we might want to customize the role so we can display better that the "assistant called a json action"
+ // if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request
+ if (i.FunctionCall != nil || i.ToolCalls != nil) && i.Role == "assistant" {
+ roleFn := "assistant_function_call"
+ r := config.Roles[roleFn]
+ if r != "" {
+ role = roleFn
}
- log.Debug().Msgf("templated message for chat: %s", templatedChatMessage)
- content = templatedChatMessage
}
- }
+ r := config.Roles[role]
+ contentExists := i.Content != nil && i.StringContent != ""
- marshalAnyRole := func(f any) {
- j, err := json.Marshal(f)
- if err == nil {
- if contentExists {
- content += "\n" + fmt.Sprint(r, " ", string(j))
+ fcall := i.FunctionCall
+ if len(i.ToolCalls) > 0 {
+ fcall = i.ToolCalls
+ }
+
+ // First attempt to populate content via a chat message specific template
+ if config.TemplateConfig.ChatMessage != "" {
+ chatMessageData := model.ChatMessageTemplateData{
+ SystemPrompt: config.SystemPrompt,
+ Role: r,
+ RoleName: role,
+ Content: i.StringContent,
+ FunctionCall: fcall,
+ FunctionName: i.Name,
+ LastMessage: messageIndex == (len(input.Messages) - 1),
+ Function: config.Grammar != "" && (messageIndex == (len(input.Messages) - 1)),
+ MessageIndex: messageIndex,
+ }
+ templatedChatMessage, err := ml.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData)
+ if err != nil {
+ log.Error().Err(err).Interface("message", chatMessageData).Str("template", config.TemplateConfig.ChatMessage).Msg("error processing message with template, skipping")
} else {
- content = fmt.Sprint(r, " ", string(j))
+ if templatedChatMessage == "" {
+ log.Warn().Msgf("template \"%s\" produced blank output for %+v. Skipping!", config.TemplateConfig.ChatMessage, chatMessageData)
+ continue // TODO: This continue is here intentionally to skip over the line `mess = append(mess, content)` below, and to prevent the sprintf
+ }
+ log.Debug().Msgf("templated message for chat: %s", templatedChatMessage)
+ content = templatedChatMessage
}
}
- }
- marshalAny := func(f any) {
- j, err := json.Marshal(f)
- if err == nil {
- if contentExists {
- content += "\n" + string(j)
+
+ marshalAnyRole := func(f any) {
+ j, err := json.Marshal(f)
+ if err == nil {
+ if contentExists {
+ content += "\n" + fmt.Sprint(r, " ", string(j))
+ } else {
+ content = fmt.Sprint(r, " ", string(j))
+ }
+ }
+ }
+ marshalAny := func(f any) {
+ j, err := json.Marshal(f)
+ if err == nil {
+ if contentExists {
+ content += "\n" + string(j)
+ } else {
+ content = string(j)
+ }
+ }
+ }
+ // If this model doesn't have such a template, or if that template fails to return a value, template at the message level.
+ if content == "" {
+ if r != "" {
+ if contentExists {
+ content = fmt.Sprint(r, i.StringContent)
+ }
+
+ if i.FunctionCall != nil {
+ marshalAnyRole(i.FunctionCall)
+ }
+ if i.ToolCalls != nil {
+ marshalAnyRole(i.ToolCalls)
+ }
} else {
- content = string(j)
+ if contentExists {
+ content = fmt.Sprint(i.StringContent)
+ }
+ if i.FunctionCall != nil {
+ marshalAny(i.FunctionCall)
+ }
+ if i.ToolCalls != nil {
+ marshalAny(i.ToolCalls)
+ }
+ }
+ // Special Handling: System. We care if it was printed at all, not the r branch, so check seperately
+ if contentExists && role == "system" {
+ suppressConfigSystemPrompt = true
}
}
- }
- // If this model doesn't have such a template, or if that template fails to return a value, template at the message level.
- if content == "" {
- if r != "" {
- if contentExists {
- content = fmt.Sprint(r, i.StringContent)
- }
- if i.FunctionCall != nil {
- marshalAnyRole(i.FunctionCall)
- }
- if i.ToolCalls != nil {
- marshalAnyRole(i.ToolCalls)
- }
+ mess = append(mess, content)
+ }
+
+ predInput = strings.Join(mess, "\n")
+ log.Debug().Msgf("Prompt (before templating): %s", predInput)
+
+ templateFile := ""
+
+ // A model can have a "file.bin.tmpl" file associated with a prompt template prefix
+ if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
+ templateFile = config.Model
+ }
+
+ if config.TemplateConfig.Chat != "" && !processFunctions {
+ templateFile = config.TemplateConfig.Chat
+ }
+
+ if config.TemplateConfig.Functions != "" && processFunctions {
+ templateFile = config.TemplateConfig.Functions
+ }
+
+ if templateFile != "" {
+ templatedInput, err := ml.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{
+ SystemPrompt: config.SystemPrompt,
+ SuppressSystemPrompt: suppressConfigSystemPrompt,
+ Input: predInput,
+ Functions: funcs,
+ })
+ if err == nil {
+ predInput = templatedInput
+ log.Debug().Msgf("Template found, input modified to: %s", predInput)
} else {
- if contentExists {
- content = fmt.Sprint(i.StringContent)
- }
- if i.FunctionCall != nil {
- marshalAny(i.FunctionCall)
- }
- if i.ToolCalls != nil {
- marshalAny(i.ToolCalls)
- }
- }
- // Special Handling: System. We care if it was printed at all, not the r branch, so check seperately
- if contentExists && role == "system" {
- suppressConfigSystemPrompt = true
+ log.Debug().Msgf("Template failed loading: %s", err.Error())
}
}
- mess = append(mess, content)
+ log.Debug().Msgf("Prompt (after templating): %s", predInput)
+ if processFunctions {
+ log.Debug().Msgf("Grammar: %+v", config.Grammar)
+ }
}
- predInput = strings.Join(mess, "\n")
- log.Debug().Msgf("Prompt (before templating): %s", predInput)
+ switch {
+ case toStream:
- if toStream {
log.Debug().Msgf("Stream request received")
c.Context().SetContentType("text/event-stream")
//c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8)
@@ -343,45 +385,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
c.Set("Cache-Control", "no-cache")
c.Set("Connection", "keep-alive")
c.Set("Transfer-Encoding", "chunked")
- }
- templateFile := ""
-
- // A model can have a "file.bin.tmpl" file associated with a prompt template prefix
- if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
- templateFile = config.Model
- }
-
- if config.TemplateConfig.Chat != "" && !processFunctions {
- templateFile = config.TemplateConfig.Chat
- }
-
- if config.TemplateConfig.Functions != "" && processFunctions {
- templateFile = config.TemplateConfig.Functions
- }
-
- if templateFile != "" {
- templatedInput, err := ml.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{
- SystemPrompt: config.SystemPrompt,
- SuppressSystemPrompt: suppressConfigSystemPrompt,
- Input: predInput,
- Functions: funcs,
- })
- if err == nil {
- predInput = templatedInput
- log.Debug().Msgf("Template found, input modified to: %s", predInput)
- } else {
- log.Debug().Msgf("Template failed loading: %s", err.Error())
- }
- }
-
- log.Debug().Msgf("Prompt (after templating): %s", predInput)
- if processFunctions {
- log.Debug().Msgf("Grammar: %+v", config.Grammar)
- }
-
- switch {
- case toStream:
responses := make(chan schema.OpenAIResponse)
if !processFunctions {
@@ -563,7 +567,7 @@ func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, m
images = append(images, m.StringImages...)
}
- predFunc, err := backend.ModelInference(input.Context, prompt, images, ml, *config, o, nil)
+ predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, ml, *config, o, nil)
if err != nil {
log.Error().Err(err).Msg("model inference failed")
return "", err
diff --git a/core/http/endpoints/openai/inference.go b/core/http/endpoints/openai/inference.go
index 5d97d21d..06e784b7 100644
--- a/core/http/endpoints/openai/inference.go
+++ b/core/http/endpoints/openai/inference.go
@@ -29,7 +29,7 @@ func ComputeChoices(
}
// get the model function to call for the result
- predFunc, err := backend.ModelInference(req.Context, predInput, images, loader, *config, o, tokenCallback)
+ predFunc, err := backend.ModelInference(req.Context, predInput, req.Messages, images, loader, *config, o, tokenCallback)
if err != nil {
return result, backend.TokenUsage{}, err
}
diff --git a/pkg/grpc/proto/backend.pb.go b/pkg/grpc/proto/backend.pb.go
index cc687577..e9afe196 100644
--- a/pkg/grpc/proto/backend.pb.go
+++ b/pkg/grpc/proto/backend.pb.go
@@ -1,7 +1,7 @@
// Code generated by protoc-gen-go. DO NOT EDIT.
// versions:
-// protoc-gen-go v1.31.0
-// protoc v4.23.4
+// protoc-gen-go v1.26.0
+// protoc v5.26.1
// source: backend.proto
package proto
@@ -532,47 +532,49 @@ type PredictOptions struct {
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
- Prompt string `protobuf:"bytes,1,opt,name=Prompt,proto3" json:"Prompt,omitempty"`
- Seed int32 `protobuf:"varint,2,opt,name=Seed,proto3" json:"Seed,omitempty"`
- Threads int32 `protobuf:"varint,3,opt,name=Threads,proto3" json:"Threads,omitempty"`
- Tokens int32 `protobuf:"varint,4,opt,name=Tokens,proto3" json:"Tokens,omitempty"`
- TopK int32 `protobuf:"varint,5,opt,name=TopK,proto3" json:"TopK,omitempty"`
- Repeat int32 `protobuf:"varint,6,opt,name=Repeat,proto3" json:"Repeat,omitempty"`
- Batch int32 `protobuf:"varint,7,opt,name=Batch,proto3" json:"Batch,omitempty"`
- NKeep int32 `protobuf:"varint,8,opt,name=NKeep,proto3" json:"NKeep,omitempty"`
- Temperature float32 `protobuf:"fixed32,9,opt,name=Temperature,proto3" json:"Temperature,omitempty"`
- Penalty float32 `protobuf:"fixed32,10,opt,name=Penalty,proto3" json:"Penalty,omitempty"`
- F16KV bool `protobuf:"varint,11,opt,name=F16KV,proto3" json:"F16KV,omitempty"`
- DebugMode bool `protobuf:"varint,12,opt,name=DebugMode,proto3" json:"DebugMode,omitempty"`
- StopPrompts []string `protobuf:"bytes,13,rep,name=StopPrompts,proto3" json:"StopPrompts,omitempty"`
- IgnoreEOS bool `protobuf:"varint,14,opt,name=IgnoreEOS,proto3" json:"IgnoreEOS,omitempty"`
- TailFreeSamplingZ float32 `protobuf:"fixed32,15,opt,name=TailFreeSamplingZ,proto3" json:"TailFreeSamplingZ,omitempty"`
- TypicalP float32 `protobuf:"fixed32,16,opt,name=TypicalP,proto3" json:"TypicalP,omitempty"`
- FrequencyPenalty float32 `protobuf:"fixed32,17,opt,name=FrequencyPenalty,proto3" json:"FrequencyPenalty,omitempty"`
- PresencePenalty float32 `protobuf:"fixed32,18,opt,name=PresencePenalty,proto3" json:"PresencePenalty,omitempty"`
- Mirostat int32 `protobuf:"varint,19,opt,name=Mirostat,proto3" json:"Mirostat,omitempty"`
- MirostatETA float32 `protobuf:"fixed32,20,opt,name=MirostatETA,proto3" json:"MirostatETA,omitempty"`
- MirostatTAU float32 `protobuf:"fixed32,21,opt,name=MirostatTAU,proto3" json:"MirostatTAU,omitempty"`
- PenalizeNL bool `protobuf:"varint,22,opt,name=PenalizeNL,proto3" json:"PenalizeNL,omitempty"`
- LogitBias string `protobuf:"bytes,23,opt,name=LogitBias,proto3" json:"LogitBias,omitempty"`
- MLock bool `protobuf:"varint,25,opt,name=MLock,proto3" json:"MLock,omitempty"`
- MMap bool `protobuf:"varint,26,opt,name=MMap,proto3" json:"MMap,omitempty"`
- PromptCacheAll bool `protobuf:"varint,27,opt,name=PromptCacheAll,proto3" json:"PromptCacheAll,omitempty"`
- PromptCacheRO bool `protobuf:"varint,28,opt,name=PromptCacheRO,proto3" json:"PromptCacheRO,omitempty"`
- Grammar string `protobuf:"bytes,29,opt,name=Grammar,proto3" json:"Grammar,omitempty"`
- MainGPU string `protobuf:"bytes,30,opt,name=MainGPU,proto3" json:"MainGPU,omitempty"`
- TensorSplit string `protobuf:"bytes,31,opt,name=TensorSplit,proto3" json:"TensorSplit,omitempty"`
- TopP float32 `protobuf:"fixed32,32,opt,name=TopP,proto3" json:"TopP,omitempty"`
- PromptCachePath string `protobuf:"bytes,33,opt,name=PromptCachePath,proto3" json:"PromptCachePath,omitempty"`
- Debug bool `protobuf:"varint,34,opt,name=Debug,proto3" json:"Debug,omitempty"`
- EmbeddingTokens []int32 `protobuf:"varint,35,rep,packed,name=EmbeddingTokens,proto3" json:"EmbeddingTokens,omitempty"`
- Embeddings string `protobuf:"bytes,36,opt,name=Embeddings,proto3" json:"Embeddings,omitempty"`
- RopeFreqBase float32 `protobuf:"fixed32,37,opt,name=RopeFreqBase,proto3" json:"RopeFreqBase,omitempty"`
- RopeFreqScale float32 `protobuf:"fixed32,38,opt,name=RopeFreqScale,proto3" json:"RopeFreqScale,omitempty"`
- NegativePromptScale float32 `protobuf:"fixed32,39,opt,name=NegativePromptScale,proto3" json:"NegativePromptScale,omitempty"`
- NegativePrompt string `protobuf:"bytes,40,opt,name=NegativePrompt,proto3" json:"NegativePrompt,omitempty"`
- NDraft int32 `protobuf:"varint,41,opt,name=NDraft,proto3" json:"NDraft,omitempty"`
- Images []string `protobuf:"bytes,42,rep,name=Images,proto3" json:"Images,omitempty"`
+ Prompt string `protobuf:"bytes,1,opt,name=Prompt,proto3" json:"Prompt,omitempty"`
+ Seed int32 `protobuf:"varint,2,opt,name=Seed,proto3" json:"Seed,omitempty"`
+ Threads int32 `protobuf:"varint,3,opt,name=Threads,proto3" json:"Threads,omitempty"`
+ Tokens int32 `protobuf:"varint,4,opt,name=Tokens,proto3" json:"Tokens,omitempty"`
+ TopK int32 `protobuf:"varint,5,opt,name=TopK,proto3" json:"TopK,omitempty"`
+ Repeat int32 `protobuf:"varint,6,opt,name=Repeat,proto3" json:"Repeat,omitempty"`
+ Batch int32 `protobuf:"varint,7,opt,name=Batch,proto3" json:"Batch,omitempty"`
+ NKeep int32 `protobuf:"varint,8,opt,name=NKeep,proto3" json:"NKeep,omitempty"`
+ Temperature float32 `protobuf:"fixed32,9,opt,name=Temperature,proto3" json:"Temperature,omitempty"`
+ Penalty float32 `protobuf:"fixed32,10,opt,name=Penalty,proto3" json:"Penalty,omitempty"`
+ F16KV bool `protobuf:"varint,11,opt,name=F16KV,proto3" json:"F16KV,omitempty"`
+ DebugMode bool `protobuf:"varint,12,opt,name=DebugMode,proto3" json:"DebugMode,omitempty"`
+ StopPrompts []string `protobuf:"bytes,13,rep,name=StopPrompts,proto3" json:"StopPrompts,omitempty"`
+ IgnoreEOS bool `protobuf:"varint,14,opt,name=IgnoreEOS,proto3" json:"IgnoreEOS,omitempty"`
+ TailFreeSamplingZ float32 `protobuf:"fixed32,15,opt,name=TailFreeSamplingZ,proto3" json:"TailFreeSamplingZ,omitempty"`
+ TypicalP float32 `protobuf:"fixed32,16,opt,name=TypicalP,proto3" json:"TypicalP,omitempty"`
+ FrequencyPenalty float32 `protobuf:"fixed32,17,opt,name=FrequencyPenalty,proto3" json:"FrequencyPenalty,omitempty"`
+ PresencePenalty float32 `protobuf:"fixed32,18,opt,name=PresencePenalty,proto3" json:"PresencePenalty,omitempty"`
+ Mirostat int32 `protobuf:"varint,19,opt,name=Mirostat,proto3" json:"Mirostat,omitempty"`
+ MirostatETA float32 `protobuf:"fixed32,20,opt,name=MirostatETA,proto3" json:"MirostatETA,omitempty"`
+ MirostatTAU float32 `protobuf:"fixed32,21,opt,name=MirostatTAU,proto3" json:"MirostatTAU,omitempty"`
+ PenalizeNL bool `protobuf:"varint,22,opt,name=PenalizeNL,proto3" json:"PenalizeNL,omitempty"`
+ LogitBias string `protobuf:"bytes,23,opt,name=LogitBias,proto3" json:"LogitBias,omitempty"`
+ MLock bool `protobuf:"varint,25,opt,name=MLock,proto3" json:"MLock,omitempty"`
+ MMap bool `protobuf:"varint,26,opt,name=MMap,proto3" json:"MMap,omitempty"`
+ PromptCacheAll bool `protobuf:"varint,27,opt,name=PromptCacheAll,proto3" json:"PromptCacheAll,omitempty"`
+ PromptCacheRO bool `protobuf:"varint,28,opt,name=PromptCacheRO,proto3" json:"PromptCacheRO,omitempty"`
+ Grammar string `protobuf:"bytes,29,opt,name=Grammar,proto3" json:"Grammar,omitempty"`
+ MainGPU string `protobuf:"bytes,30,opt,name=MainGPU,proto3" json:"MainGPU,omitempty"`
+ TensorSplit string `protobuf:"bytes,31,opt,name=TensorSplit,proto3" json:"TensorSplit,omitempty"`
+ TopP float32 `protobuf:"fixed32,32,opt,name=TopP,proto3" json:"TopP,omitempty"`
+ PromptCachePath string `protobuf:"bytes,33,opt,name=PromptCachePath,proto3" json:"PromptCachePath,omitempty"`
+ Debug bool `protobuf:"varint,34,opt,name=Debug,proto3" json:"Debug,omitempty"`
+ EmbeddingTokens []int32 `protobuf:"varint,35,rep,packed,name=EmbeddingTokens,proto3" json:"EmbeddingTokens,omitempty"`
+ Embeddings string `protobuf:"bytes,36,opt,name=Embeddings,proto3" json:"Embeddings,omitempty"`
+ RopeFreqBase float32 `protobuf:"fixed32,37,opt,name=RopeFreqBase,proto3" json:"RopeFreqBase,omitempty"`
+ RopeFreqScale float32 `protobuf:"fixed32,38,opt,name=RopeFreqScale,proto3" json:"RopeFreqScale,omitempty"`
+ NegativePromptScale float32 `protobuf:"fixed32,39,opt,name=NegativePromptScale,proto3" json:"NegativePromptScale,omitempty"`
+ NegativePrompt string `protobuf:"bytes,40,opt,name=NegativePrompt,proto3" json:"NegativePrompt,omitempty"`
+ NDraft int32 `protobuf:"varint,41,opt,name=NDraft,proto3" json:"NDraft,omitempty"`
+ Images []string `protobuf:"bytes,42,rep,name=Images,proto3" json:"Images,omitempty"`
+ UseTokenizerTemplate bool `protobuf:"varint,43,opt,name=UseTokenizerTemplate,proto3" json:"UseTokenizerTemplate,omitempty"`
+ Messages []*Message `protobuf:"bytes,44,rep,name=Messages,proto3" json:"Messages,omitempty"`
}
func (x *PredictOptions) Reset() {
@@ -894,6 +896,20 @@ func (x *PredictOptions) GetImages() []string {
return nil
}
+func (x *PredictOptions) GetUseTokenizerTemplate() bool {
+ if x != nil {
+ return x.UseTokenizerTemplate
+ }
+ return false
+}
+
+func (x *PredictOptions) GetMessages() []*Message {
+ if x != nil {
+ return x.Messages
+ }
+ return nil
+}
+
// The response message containing the result
type Reply struct {
state protoimpl.MessageState
@@ -2080,6 +2096,61 @@ func (x *StatusResponse) GetMemory() *MemoryUsageData {
return nil
}
+type Message struct {
+ state protoimpl.MessageState
+ sizeCache protoimpl.SizeCache
+ unknownFields protoimpl.UnknownFields
+
+ Role string `protobuf:"bytes,1,opt,name=role,proto3" json:"role,omitempty"`
+ Content string `protobuf:"bytes,2,opt,name=content,proto3" json:"content,omitempty"`
+}
+
+func (x *Message) Reset() {
+ *x = Message{}
+ if protoimpl.UnsafeEnabled {
+ mi := &file_backend_proto_msgTypes[22]
+ ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+ ms.StoreMessageInfo(mi)
+ }
+}
+
+func (x *Message) String() string {
+ return protoimpl.X.MessageStringOf(x)
+}
+
+func (*Message) ProtoMessage() {}
+
+func (x *Message) ProtoReflect() protoreflect.Message {
+ mi := &file_backend_proto_msgTypes[22]
+ if protoimpl.UnsafeEnabled && x != nil {
+ ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+ if ms.LoadMessageInfo() == nil {
+ ms.StoreMessageInfo(mi)
+ }
+ return ms
+ }
+ return mi.MessageOf(x)
+}
+
+// Deprecated: Use Message.ProtoReflect.Descriptor instead.
+func (*Message) Descriptor() ([]byte, []int) {
+ return file_backend_proto_rawDescGZIP(), []int{22}
+}
+
+func (x *Message) GetRole() string {
+ if x != nil {
+ return x.Role
+ }
+ return ""
+}
+
+func (x *Message) GetContent() string {
+ if x != nil {
+ return x.Content
+ }
+ return ""
+}
+
var File_backend_proto protoreflect.FileDescriptor
var file_backend_proto_rawDesc = []byte{
@@ -2125,7 +2196,7 @@ var file_backend_proto_rawDesc = []byte{
0x6d, 0x69, 0x6c, 0x61, 0x72, 0x69, 0x74, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x02,
0x52, 0x0c, 0x53, 0x69, 0x6d, 0x69, 0x6c, 0x61, 0x72, 0x69, 0x74, 0x69, 0x65, 0x73, 0x22, 0x0f,
0x0a, 0x0d, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22,
- 0xf4, 0x09, 0x0a, 0x0e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f,
+ 0xd6, 0x0a, 0x0a, 0x0e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f,
0x6e, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x01, 0x20, 0x01,
0x28, 0x09, 0x52, 0x06, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x53, 0x65,
0x65, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x53, 0x65, 0x65, 0x64, 0x12, 0x18,
@@ -2204,257 +2275,267 @@ var file_backend_proto_rawDesc = []byte{
0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x4e, 0x44, 0x72, 0x61, 0x66, 0x74,
0x18, 0x29, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x4e, 0x44, 0x72, 0x61, 0x66, 0x74, 0x12, 0x16,
0x0a, 0x06, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x73, 0x18, 0x2a, 0x20, 0x03, 0x28, 0x09, 0x52, 0x06,
- 0x49, 0x6d, 0x61, 0x67, 0x65, 0x73, 0x22, 0x21, 0x0a, 0x05, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x12,
- 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c,
- 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0xa2, 0x0d, 0x0a, 0x0c, 0x4d, 0x6f,
- 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x4d, 0x6f,
- 0x64, 0x65, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x4d, 0x6f, 0x64, 0x65, 0x6c,
- 0x12, 0x20, 0x0a, 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x53, 0x69, 0x7a, 0x65, 0x18,
- 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x53, 0x69,
- 0x7a, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x53, 0x65, 0x65, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05,
- 0x52, 0x04, 0x53, 0x65, 0x65, 0x64, 0x12, 0x16, 0x0a, 0x06, 0x4e, 0x42, 0x61, 0x74, 0x63, 0x68,
- 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x4e, 0x42, 0x61, 0x74, 0x63, 0x68, 0x12, 0x1c,
- 0x0a, 0x09, 0x46, 0x31, 0x36, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x18, 0x05, 0x20, 0x01, 0x28,
- 0x08, 0x52, 0x09, 0x46, 0x31, 0x36, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x12, 0x14, 0x0a, 0x05,
- 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x4d, 0x4c, 0x6f,
- 0x63, 0x6b, 0x12, 0x12, 0x0a, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x18, 0x07, 0x20, 0x01, 0x28, 0x08,
- 0x52, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x12, 0x1c, 0x0a, 0x09, 0x56, 0x6f, 0x63, 0x61, 0x62, 0x4f,
- 0x6e, 0x6c, 0x79, 0x18, 0x08, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x56, 0x6f, 0x63, 0x61, 0x62,
- 0x4f, 0x6e, 0x6c, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x4c, 0x6f, 0x77, 0x56, 0x52, 0x41, 0x4d, 0x18,
- 0x09, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x4c, 0x6f, 0x77, 0x56, 0x52, 0x41, 0x4d, 0x12, 0x1e,
- 0x0a, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x0a, 0x20, 0x01,
- 0x28, 0x08, 0x52, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x12, 0x12,
- 0x0a, 0x04, 0x4e, 0x55, 0x4d, 0x41, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x4e, 0x55,
- 0x4d, 0x41, 0x12, 0x1e, 0x0a, 0x0a, 0x4e, 0x47, 0x50, 0x55, 0x4c, 0x61, 0x79, 0x65, 0x72, 0x73,
- 0x18, 0x0c, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0a, 0x4e, 0x47, 0x50, 0x55, 0x4c, 0x61, 0x79, 0x65,
- 0x72, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x18, 0x0d, 0x20,
- 0x01, 0x28, 0x09, 0x52, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x12, 0x20, 0x0a, 0x0b,
- 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74, 0x18, 0x0e, 0x20, 0x01, 0x28,
- 0x09, 0x52, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74, 0x12, 0x18,
- 0x0a, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, 0x0f, 0x20, 0x01, 0x28, 0x05, 0x52,
- 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x12, 0x2c, 0x0a, 0x11, 0x4c, 0x69, 0x62, 0x72,
- 0x61, 0x72, 0x79, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, 0x50, 0x61, 0x74, 0x68, 0x18, 0x10, 0x20,
- 0x01, 0x28, 0x09, 0x52, 0x11, 0x4c, 0x69, 0x62, 0x72, 0x61, 0x72, 0x79, 0x53, 0x65, 0x61, 0x72,
- 0x63, 0x68, 0x50, 0x61, 0x74, 0x68, 0x12, 0x22, 0x0a, 0x0c, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72,
- 0x65, 0x71, 0x42, 0x61, 0x73, 0x65, 0x18, 0x11, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, 0x52, 0x6f,
- 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x42, 0x61, 0x73, 0x65, 0x12, 0x24, 0x0a, 0x0d, 0x52, 0x6f,
- 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x12, 0x20, 0x01, 0x28,
- 0x02, 0x52, 0x0d, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61, 0x6c, 0x65,
- 0x12, 0x1e, 0x0a, 0x0a, 0x52, 0x4d, 0x53, 0x4e, 0x6f, 0x72, 0x6d, 0x45, 0x70, 0x73, 0x18, 0x13,
- 0x20, 0x01, 0x28, 0x02, 0x52, 0x0a, 0x52, 0x4d, 0x53, 0x4e, 0x6f, 0x72, 0x6d, 0x45, 0x70, 0x73,
- 0x12, 0x12, 0x0a, 0x04, 0x4e, 0x47, 0x51, 0x41, 0x18, 0x14, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04,
- 0x4e, 0x47, 0x51, 0x41, 0x12, 0x1c, 0x0a, 0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x46, 0x69, 0x6c,
- 0x65, 0x18, 0x15, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x46, 0x69,
- 0x6c, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x44, 0x65, 0x76, 0x69, 0x63, 0x65, 0x18, 0x16, 0x20, 0x01,
- 0x28, 0x09, 0x52, 0x06, 0x44, 0x65, 0x76, 0x69, 0x63, 0x65, 0x12, 0x1c, 0x0a, 0x09, 0x55, 0x73,
- 0x65, 0x54, 0x72, 0x69, 0x74, 0x6f, 0x6e, 0x18, 0x17, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x55,
- 0x73, 0x65, 0x54, 0x72, 0x69, 0x74, 0x6f, 0x6e, 0x12, 0x24, 0x0a, 0x0d, 0x4d, 0x6f, 0x64, 0x65,
- 0x6c, 0x42, 0x61, 0x73, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x18, 0x18, 0x20, 0x01, 0x28, 0x09, 0x52,
- 0x0d, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x42, 0x61, 0x73, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x2a,
- 0x0a, 0x10, 0x55, 0x73, 0x65, 0x46, 0x61, 0x73, 0x74, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a,
- 0x65, 0x72, 0x18, 0x19, 0x20, 0x01, 0x28, 0x08, 0x52, 0x10, 0x55, 0x73, 0x65, 0x46, 0x61, 0x73,
- 0x74, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x12, 0x22, 0x0a, 0x0c, 0x50, 0x69,
- 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x54, 0x79, 0x70, 0x65, 0x18, 0x1a, 0x20, 0x01, 0x28, 0x09,
- 0x52, 0x0c, 0x50, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x54, 0x79, 0x70, 0x65, 0x12, 0x24,
- 0x0a, 0x0d, 0x53, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, 0x65, 0x72, 0x54, 0x79, 0x70, 0x65, 0x18,
- 0x1b, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x53, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, 0x65, 0x72,
- 0x54, 0x79, 0x70, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x43, 0x55, 0x44, 0x41, 0x18, 0x1c, 0x20, 0x01,
- 0x28, 0x08, 0x52, 0x04, 0x43, 0x55, 0x44, 0x41, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x46, 0x47, 0x53,
- 0x63, 0x61, 0x6c, 0x65, 0x18, 0x1d, 0x20, 0x01, 0x28, 0x02, 0x52, 0x08, 0x43, 0x46, 0x47, 0x53,
- 0x63, 0x61, 0x6c, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x49, 0x4d, 0x47, 0x32, 0x49, 0x4d, 0x47, 0x18,
- 0x1e, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x49, 0x4d, 0x47, 0x32, 0x49, 0x4d, 0x47, 0x12, 0x1c,
- 0x0a, 0x09, 0x43, 0x4c, 0x49, 0x50, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x1f, 0x20, 0x01, 0x28,
- 0x09, 0x52, 0x09, 0x43, 0x4c, 0x49, 0x50, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x24, 0x0a, 0x0d,
- 0x43, 0x4c, 0x49, 0x50, 0x53, 0x75, 0x62, 0x66, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x18, 0x20, 0x20,
- 0x01, 0x28, 0x09, 0x52, 0x0d, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x75, 0x62, 0x66, 0x6f, 0x6c, 0x64,
- 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x18, 0x21,
- 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x12, 0x1e,
- 0x0a, 0x0a, 0x43, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x4e, 0x65, 0x74, 0x18, 0x30, 0x20, 0x01,
- 0x28, 0x09, 0x52, 0x0a, 0x43, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x4e, 0x65, 0x74, 0x12, 0x1c,
- 0x0a, 0x09, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x18, 0x22, 0x20, 0x01, 0x28,
- 0x09, 0x52, 0x09, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08,
- 0x4c, 0x6f, 0x72, 0x61, 0x42, 0x61, 0x73, 0x65, 0x18, 0x23, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08,
- 0x4c, 0x6f, 0x72, 0x61, 0x42, 0x61, 0x73, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x4c, 0x6f, 0x72, 0x61,
- 0x41, 0x64, 0x61, 0x70, 0x74, 0x65, 0x72, 0x18, 0x24, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x4c,
- 0x6f, 0x72, 0x61, 0x41, 0x64, 0x61, 0x70, 0x74, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x4c, 0x6f,
- 0x72, 0x61, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x2a, 0x20, 0x01, 0x28, 0x02, 0x52, 0x09, 0x4c,
- 0x6f, 0x72, 0x61, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x1c, 0x0a, 0x09, 0x4e, 0x6f, 0x4d, 0x75,
- 0x6c, 0x4d, 0x61, 0x74, 0x51, 0x18, 0x25, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x4e, 0x6f, 0x4d,
- 0x75, 0x6c, 0x4d, 0x61, 0x74, 0x51, 0x12, 0x1e, 0x0a, 0x0a, 0x44, 0x72, 0x61, 0x66, 0x74, 0x4d,
- 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x27, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x44, 0x72, 0x61, 0x66,
- 0x74, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x1c, 0x0a, 0x09, 0x41, 0x75, 0x64, 0x69, 0x6f, 0x50,
- 0x61, 0x74, 0x68, 0x18, 0x26, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x41, 0x75, 0x64, 0x69, 0x6f,
- 0x50, 0x61, 0x74, 0x68, 0x12, 0x22, 0x0a, 0x0c, 0x51, 0x75, 0x61, 0x6e, 0x74, 0x69, 0x7a, 0x61,
- 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x28, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x51, 0x75, 0x61, 0x6e,
- 0x74, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x32, 0x0a, 0x14, 0x47, 0x50, 0x55, 0x4d,
- 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x74, 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e,
- 0x18, 0x32, 0x20, 0x01, 0x28, 0x02, 0x52, 0x14, 0x47, 0x50, 0x55, 0x4d, 0x65, 0x6d, 0x6f, 0x72,
- 0x79, 0x55, 0x74, 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x28, 0x0a, 0x0f,
- 0x54, 0x72, 0x75, 0x73, 0x74, 0x52, 0x65, 0x6d, 0x6f, 0x74, 0x65, 0x43, 0x6f, 0x64, 0x65, 0x18,
- 0x33, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0f, 0x54, 0x72, 0x75, 0x73, 0x74, 0x52, 0x65, 0x6d, 0x6f,
- 0x74, 0x65, 0x43, 0x6f, 0x64, 0x65, 0x12, 0x22, 0x0a, 0x0c, 0x45, 0x6e, 0x66, 0x6f, 0x72, 0x63,
- 0x65, 0x45, 0x61, 0x67, 0x65, 0x72, 0x18, 0x34, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0c, 0x45, 0x6e,
- 0x66, 0x6f, 0x72, 0x63, 0x65, 0x45, 0x61, 0x67, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x53, 0x77,
- 0x61, 0x70, 0x53, 0x70, 0x61, 0x63, 0x65, 0x18, 0x35, 0x20, 0x01, 0x28, 0x05, 0x52, 0x09, 0x53,
- 0x77, 0x61, 0x70, 0x53, 0x70, 0x61, 0x63, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x4d, 0x61, 0x78, 0x4d,
- 0x6f, 0x64, 0x65, 0x6c, 0x4c, 0x65, 0x6e, 0x18, 0x36, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x4d,
- 0x61, 0x78, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4c, 0x65, 0x6e, 0x12, 0x16, 0x0a, 0x06, 0x4d, 0x4d,
- 0x50, 0x72, 0x6f, 0x6a, 0x18, 0x29, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x4d, 0x4d, 0x50, 0x72,
- 0x6f, 0x6a, 0x12, 0x20, 0x0a, 0x0b, 0x52, 0x6f, 0x70, 0x65, 0x53, 0x63, 0x61, 0x6c, 0x69, 0x6e,
- 0x67, 0x18, 0x2b, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x52, 0x6f, 0x70, 0x65, 0x53, 0x63, 0x61,
- 0x6c, 0x69, 0x6e, 0x67, 0x12, 0x24, 0x0a, 0x0d, 0x59, 0x61, 0x72, 0x6e, 0x45, 0x78, 0x74, 0x46,
- 0x61, 0x63, 0x74, 0x6f, 0x72, 0x18, 0x2c, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0d, 0x59, 0x61, 0x72,
- 0x6e, 0x45, 0x78, 0x74, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x12, 0x26, 0x0a, 0x0e, 0x59, 0x61,
- 0x72, 0x6e, 0x41, 0x74, 0x74, 0x6e, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x18, 0x2d, 0x20, 0x01,
- 0x28, 0x02, 0x52, 0x0e, 0x59, 0x61, 0x72, 0x6e, 0x41, 0x74, 0x74, 0x6e, 0x46, 0x61, 0x63, 0x74,
- 0x6f, 0x72, 0x12, 0x22, 0x0a, 0x0c, 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, 0x46, 0x61,
- 0x73, 0x74, 0x18, 0x2e, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65,
- 0x74, 0x61, 0x46, 0x61, 0x73, 0x74, 0x12, 0x22, 0x0a, 0x0c, 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65,
- 0x74, 0x61, 0x53, 0x6c, 0x6f, 0x77, 0x18, 0x2f, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, 0x59, 0x61,
- 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, 0x53, 0x6c, 0x6f, 0x77, 0x12, 0x12, 0x0a, 0x04, 0x54, 0x79,
- 0x70, 0x65, 0x18, 0x31, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x54, 0x79, 0x70, 0x65, 0x22, 0x3c,
- 0x0a, 0x06, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73,
- 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61,
- 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x18, 0x02, 0x20,
- 0x01, 0x28, 0x08, 0x52, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x22, 0x31, 0x0a, 0x0f,
- 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12,
- 0x1e, 0x0a, 0x0a, 0x65, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x01, 0x20,
- 0x03, 0x28, 0x02, 0x52, 0x0a, 0x65, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x22,
- 0x5b, 0x0a, 0x11, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x71,
- 0x75, 0x65, 0x73, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28,
- 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61,
- 0x67, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61,
- 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x74, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, 0x04, 0x20,
- 0x01, 0x28, 0x0d, 0x52, 0x07, 0x74, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x22, 0x5e, 0x0a, 0x10,
- 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74,
- 0x12, 0x36, 0x0a, 0x08, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03,
- 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61,
- 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x52, 0x08,
- 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74,
- 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x22, 0x77, 0x0a, 0x11,
- 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e,
- 0x74, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x02, 0x69,
- 0x64, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03,
- 0x52, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x65, 0x6e, 0x64, 0x18, 0x03,
- 0x20, 0x01, 0x28, 0x03, 0x52, 0x03, 0x65, 0x6e, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78,
- 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, 0x16, 0x0a,
- 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x05, 0x52, 0x06, 0x74,
- 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x22, 0xbe, 0x02, 0x0a, 0x14, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61,
- 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x16,
- 0x0a, 0x06, 0x68, 0x65, 0x69, 0x67, 0x68, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06,
- 0x68, 0x65, 0x69, 0x67, 0x68, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x77, 0x69, 0x64, 0x74, 0x68, 0x18,
- 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x77, 0x69, 0x64, 0x74, 0x68, 0x12, 0x12, 0x0a, 0x04,
- 0x6d, 0x6f, 0x64, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x6d, 0x6f, 0x64, 0x65,
- 0x12, 0x12, 0x0a, 0x04, 0x73, 0x74, 0x65, 0x70, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04,
- 0x73, 0x74, 0x65, 0x70, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x65, 0x65, 0x64, 0x18, 0x05, 0x20, 0x01,
- 0x28, 0x05, 0x52, 0x04, 0x73, 0x65, 0x65, 0x64, 0x12, 0x27, 0x0a, 0x0f, 0x70, 0x6f, 0x73, 0x69,
- 0x74, 0x69, 0x76, 0x65, 0x5f, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x06, 0x20, 0x01, 0x28,
- 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70,
- 0x74, 0x12, 0x27, 0x0a, 0x0f, 0x6e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x70, 0x72,
- 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x6e, 0x65, 0x67, 0x61,
- 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73,
- 0x74, 0x18, 0x08, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x12, 0x10, 0x0a, 0x03,
- 0x73, 0x72, 0x63, 0x18, 0x09, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x72, 0x63, 0x12, 0x2a,
- 0x0a, 0x10, 0x45, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65,
- 0x72, 0x73, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x09, 0x52, 0x10, 0x45, 0x6e, 0x61, 0x62, 0x6c, 0x65,
- 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x73, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x4c,
- 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x43, 0x4c,
- 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x22, 0x5e, 0x0a, 0x0a, 0x54, 0x54, 0x53, 0x52, 0x65, 0x71,
- 0x75, 0x65, 0x73, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x01, 0x20, 0x01,
- 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x6d, 0x6f, 0x64, 0x65,
- 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x10,
- 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74,
- 0x12, 0x14, 0x0a, 0x05, 0x76, 0x6f, 0x69, 0x63, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52,
- 0x05, 0x76, 0x6f, 0x69, 0x63, 0x65, 0x22, 0x46, 0x0a, 0x14, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69,
- 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x16,
- 0x0a, 0x06, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06,
- 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x12, 0x16, 0x0a, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73,
- 0x18, 0x02, 0x20, 0x03, 0x28, 0x05, 0x52, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x22, 0xac,
- 0x01, 0x0a, 0x0f, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65, 0x44, 0x61,
- 0x74, 0x61, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28,
- 0x04, 0x52, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x12, 0x45, 0x0a, 0x09, 0x62, 0x72, 0x65, 0x61,
- 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x62, 0x61,
- 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67,
- 0x65, 0x44, 0x61, 0x74, 0x61, 0x2e, 0x42, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x45,
- 0x6e, 0x74, 0x72, 0x79, 0x52, 0x09, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x1a,
- 0x3c, 0x0a, 0x0e, 0x42, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x45, 0x6e, 0x74, 0x72,
- 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03,
- 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01,
- 0x28, 0x04, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0xbc, 0x01,
- 0x0a, 0x0e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65,
- 0x12, 0x33, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32,
- 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73,
- 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05,
- 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x30, 0x0a, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x18,
- 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e,
- 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65, 0x44, 0x61, 0x74, 0x61, 0x52,
- 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x22, 0x43, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, 0x65,
- 0x12, 0x11, 0x0a, 0x0d, 0x55, 0x4e, 0x49, 0x4e, 0x49, 0x54, 0x49, 0x41, 0x4c, 0x49, 0x5a, 0x45,
- 0x44, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x42, 0x55, 0x53, 0x59, 0x10, 0x01, 0x12, 0x09, 0x0a,
- 0x05, 0x52, 0x45, 0x41, 0x44, 0x59, 0x10, 0x02, 0x12, 0x12, 0x0a, 0x05, 0x45, 0x52, 0x52, 0x4f,
- 0x52, 0x10, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0x32, 0xfb, 0x06, 0x0a,
- 0x07, 0x42, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x12, 0x32, 0x0a, 0x06, 0x48, 0x65, 0x61, 0x6c,
- 0x74, 0x68, 0x12, 0x16, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, 0x65, 0x61,
- 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63,
- 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x34, 0x0a, 0x07,
- 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e,
+ 0x49, 0x6d, 0x61, 0x67, 0x65, 0x73, 0x12, 0x32, 0x0a, 0x14, 0x55, 0x73, 0x65, 0x54, 0x6f, 0x6b,
+ 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x18, 0x2b,
+ 0x20, 0x01, 0x28, 0x08, 0x52, 0x14, 0x55, 0x73, 0x65, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a,
+ 0x65, 0x72, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x12, 0x2c, 0x0a, 0x08, 0x4d, 0x65,
+ 0x73, 0x73, 0x61, 0x67, 0x65, 0x73, 0x18, 0x2c, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x10, 0x2e, 0x62,
+ 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x52, 0x08,
+ 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x73, 0x22, 0x21, 0x0a, 0x05, 0x52, 0x65, 0x70, 0x6c,
+ 0x79, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01,
+ 0x28, 0x0c, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0xa2, 0x0d, 0x0a, 0x0c,
+ 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x14, 0x0a, 0x05,
+ 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x4d, 0x6f, 0x64,
+ 0x65, 0x6c, 0x12, 0x20, 0x0a, 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x53, 0x69, 0x7a,
+ 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74,
+ 0x53, 0x69, 0x7a, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x53, 0x65, 0x65, 0x64, 0x18, 0x03, 0x20, 0x01,
+ 0x28, 0x05, 0x52, 0x04, 0x53, 0x65, 0x65, 0x64, 0x12, 0x16, 0x0a, 0x06, 0x4e, 0x42, 0x61, 0x74,
+ 0x63, 0x68, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x4e, 0x42, 0x61, 0x74, 0x63, 0x68,
+ 0x12, 0x1c, 0x0a, 0x09, 0x46, 0x31, 0x36, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x18, 0x05, 0x20,
+ 0x01, 0x28, 0x08, 0x52, 0x09, 0x46, 0x31, 0x36, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x12, 0x14,
+ 0x0a, 0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x4d,
+ 0x4c, 0x6f, 0x63, 0x6b, 0x12, 0x12, 0x0a, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x18, 0x07, 0x20, 0x01,
+ 0x28, 0x08, 0x52, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x12, 0x1c, 0x0a, 0x09, 0x56, 0x6f, 0x63, 0x61,
+ 0x62, 0x4f, 0x6e, 0x6c, 0x79, 0x18, 0x08, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x56, 0x6f, 0x63,
+ 0x61, 0x62, 0x4f, 0x6e, 0x6c, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x4c, 0x6f, 0x77, 0x56, 0x52, 0x41,
+ 0x4d, 0x18, 0x09, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x4c, 0x6f, 0x77, 0x56, 0x52, 0x41, 0x4d,
+ 0x12, 0x1e, 0x0a, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x0a,
+ 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73,
+ 0x12, 0x12, 0x0a, 0x04, 0x4e, 0x55, 0x4d, 0x41, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04,
+ 0x4e, 0x55, 0x4d, 0x41, 0x12, 0x1e, 0x0a, 0x0a, 0x4e, 0x47, 0x50, 0x55, 0x4c, 0x61, 0x79, 0x65,
+ 0x72, 0x73, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0a, 0x4e, 0x47, 0x50, 0x55, 0x4c, 0x61,
+ 0x79, 0x65, 0x72, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x18,
+ 0x0d, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x12, 0x20,
+ 0x0a, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74, 0x18, 0x0e, 0x20,
+ 0x01, 0x28, 0x09, 0x52, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74,
+ 0x12, 0x18, 0x0a, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, 0x0f, 0x20, 0x01, 0x28,
+ 0x05, 0x52, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x12, 0x2c, 0x0a, 0x11, 0x4c, 0x69,
+ 0x62, 0x72, 0x61, 0x72, 0x79, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, 0x50, 0x61, 0x74, 0x68, 0x18,
+ 0x10, 0x20, 0x01, 0x28, 0x09, 0x52, 0x11, 0x4c, 0x69, 0x62, 0x72, 0x61, 0x72, 0x79, 0x53, 0x65,
+ 0x61, 0x72, 0x63, 0x68, 0x50, 0x61, 0x74, 0x68, 0x12, 0x22, 0x0a, 0x0c, 0x52, 0x6f, 0x70, 0x65,
+ 0x46, 0x72, 0x65, 0x71, 0x42, 0x61, 0x73, 0x65, 0x18, 0x11, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c,
+ 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x42, 0x61, 0x73, 0x65, 0x12, 0x24, 0x0a, 0x0d,
+ 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x12, 0x20,
+ 0x01, 0x28, 0x02, 0x52, 0x0d, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61,
+ 0x6c, 0x65, 0x12, 0x1e, 0x0a, 0x0a, 0x52, 0x4d, 0x53, 0x4e, 0x6f, 0x72, 0x6d, 0x45, 0x70, 0x73,
+ 0x18, 0x13, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0a, 0x52, 0x4d, 0x53, 0x4e, 0x6f, 0x72, 0x6d, 0x45,
+ 0x70, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x4e, 0x47, 0x51, 0x41, 0x18, 0x14, 0x20, 0x01, 0x28, 0x05,
+ 0x52, 0x04, 0x4e, 0x47, 0x51, 0x41, 0x12, 0x1c, 0x0a, 0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x46,
+ 0x69, 0x6c, 0x65, 0x18, 0x15, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c,
+ 0x46, 0x69, 0x6c, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x44, 0x65, 0x76, 0x69, 0x63, 0x65, 0x18, 0x16,
+ 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x44, 0x65, 0x76, 0x69, 0x63, 0x65, 0x12, 0x1c, 0x0a, 0x09,
+ 0x55, 0x73, 0x65, 0x54, 0x72, 0x69, 0x74, 0x6f, 0x6e, 0x18, 0x17, 0x20, 0x01, 0x28, 0x08, 0x52,
+ 0x09, 0x55, 0x73, 0x65, 0x54, 0x72, 0x69, 0x74, 0x6f, 0x6e, 0x12, 0x24, 0x0a, 0x0d, 0x4d, 0x6f,
+ 0x64, 0x65, 0x6c, 0x42, 0x61, 0x73, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x18, 0x18, 0x20, 0x01, 0x28,
+ 0x09, 0x52, 0x0d, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x42, 0x61, 0x73, 0x65, 0x4e, 0x61, 0x6d, 0x65,
+ 0x12, 0x2a, 0x0a, 0x10, 0x55, 0x73, 0x65, 0x46, 0x61, 0x73, 0x74, 0x54, 0x6f, 0x6b, 0x65, 0x6e,
+ 0x69, 0x7a, 0x65, 0x72, 0x18, 0x19, 0x20, 0x01, 0x28, 0x08, 0x52, 0x10, 0x55, 0x73, 0x65, 0x46,
+ 0x61, 0x73, 0x74, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x12, 0x22, 0x0a, 0x0c,
+ 0x50, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x54, 0x79, 0x70, 0x65, 0x18, 0x1a, 0x20, 0x01,
+ 0x28, 0x09, 0x52, 0x0c, 0x50, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x54, 0x79, 0x70, 0x65,
+ 0x12, 0x24, 0x0a, 0x0d, 0x53, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, 0x65, 0x72, 0x54, 0x79, 0x70,
+ 0x65, 0x18, 0x1b, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x53, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c,
+ 0x65, 0x72, 0x54, 0x79, 0x70, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x43, 0x55, 0x44, 0x41, 0x18, 0x1c,
+ 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x43, 0x55, 0x44, 0x41, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x46,
+ 0x47, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x1d, 0x20, 0x01, 0x28, 0x02, 0x52, 0x08, 0x43, 0x46,
+ 0x47, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x49, 0x4d, 0x47, 0x32, 0x49, 0x4d,
+ 0x47, 0x18, 0x1e, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x49, 0x4d, 0x47, 0x32, 0x49, 0x4d, 0x47,
+ 0x12, 0x1c, 0x0a, 0x09, 0x43, 0x4c, 0x49, 0x50, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x1f, 0x20,
+ 0x01, 0x28, 0x09, 0x52, 0x09, 0x43, 0x4c, 0x49, 0x50, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x24,
+ 0x0a, 0x0d, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x75, 0x62, 0x66, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x18,
+ 0x20, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x75, 0x62, 0x66, 0x6f,
+ 0x6c, 0x64, 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70,
+ 0x18, 0x21, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70,
+ 0x12, 0x1e, 0x0a, 0x0a, 0x43, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x4e, 0x65, 0x74, 0x18, 0x30,
+ 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x43, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x4e, 0x65, 0x74,
+ 0x12, 0x1c, 0x0a, 0x09, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x18, 0x22, 0x20,
+ 0x01, 0x28, 0x09, 0x52, 0x09, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x12, 0x1a,
+ 0x0a, 0x08, 0x4c, 0x6f, 0x72, 0x61, 0x42, 0x61, 0x73, 0x65, 0x18, 0x23, 0x20, 0x01, 0x28, 0x09,
+ 0x52, 0x08, 0x4c, 0x6f, 0x72, 0x61, 0x42, 0x61, 0x73, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x4c, 0x6f,
+ 0x72, 0x61, 0x41, 0x64, 0x61, 0x70, 0x74, 0x65, 0x72, 0x18, 0x24, 0x20, 0x01, 0x28, 0x09, 0x52,
+ 0x0b, 0x4c, 0x6f, 0x72, 0x61, 0x41, 0x64, 0x61, 0x70, 0x74, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09,
+ 0x4c, 0x6f, 0x72, 0x61, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x2a, 0x20, 0x01, 0x28, 0x02, 0x52,
+ 0x09, 0x4c, 0x6f, 0x72, 0x61, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x1c, 0x0a, 0x09, 0x4e, 0x6f,
+ 0x4d, 0x75, 0x6c, 0x4d, 0x61, 0x74, 0x51, 0x18, 0x25, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x4e,
+ 0x6f, 0x4d, 0x75, 0x6c, 0x4d, 0x61, 0x74, 0x51, 0x12, 0x1e, 0x0a, 0x0a, 0x44, 0x72, 0x61, 0x66,
+ 0x74, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x27, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x44, 0x72,
+ 0x61, 0x66, 0x74, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x1c, 0x0a, 0x09, 0x41, 0x75, 0x64, 0x69,
+ 0x6f, 0x50, 0x61, 0x74, 0x68, 0x18, 0x26, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x41, 0x75, 0x64,
+ 0x69, 0x6f, 0x50, 0x61, 0x74, 0x68, 0x12, 0x22, 0x0a, 0x0c, 0x51, 0x75, 0x61, 0x6e, 0x74, 0x69,
+ 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x28, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x51, 0x75,
+ 0x61, 0x6e, 0x74, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x32, 0x0a, 0x14, 0x47, 0x50,
+ 0x55, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x74, 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, 0x69,
+ 0x6f, 0x6e, 0x18, 0x32, 0x20, 0x01, 0x28, 0x02, 0x52, 0x14, 0x47, 0x50, 0x55, 0x4d, 0x65, 0x6d,
+ 0x6f, 0x72, 0x79, 0x55, 0x74, 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x28,
+ 0x0a, 0x0f, 0x54, 0x72, 0x75, 0x73, 0x74, 0x52, 0x65, 0x6d, 0x6f, 0x74, 0x65, 0x43, 0x6f, 0x64,
+ 0x65, 0x18, 0x33, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0f, 0x54, 0x72, 0x75, 0x73, 0x74, 0x52, 0x65,
+ 0x6d, 0x6f, 0x74, 0x65, 0x43, 0x6f, 0x64, 0x65, 0x12, 0x22, 0x0a, 0x0c, 0x45, 0x6e, 0x66, 0x6f,
+ 0x72, 0x63, 0x65, 0x45, 0x61, 0x67, 0x65, 0x72, 0x18, 0x34, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0c,
+ 0x45, 0x6e, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x45, 0x61, 0x67, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09,
+ 0x53, 0x77, 0x61, 0x70, 0x53, 0x70, 0x61, 0x63, 0x65, 0x18, 0x35, 0x20, 0x01, 0x28, 0x05, 0x52,
+ 0x09, 0x53, 0x77, 0x61, 0x70, 0x53, 0x70, 0x61, 0x63, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x4d, 0x61,
+ 0x78, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4c, 0x65, 0x6e, 0x18, 0x36, 0x20, 0x01, 0x28, 0x05, 0x52,
+ 0x0b, 0x4d, 0x61, 0x78, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4c, 0x65, 0x6e, 0x12, 0x16, 0x0a, 0x06,
+ 0x4d, 0x4d, 0x50, 0x72, 0x6f, 0x6a, 0x18, 0x29, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x4d, 0x4d,
+ 0x50, 0x72, 0x6f, 0x6a, 0x12, 0x20, 0x0a, 0x0b, 0x52, 0x6f, 0x70, 0x65, 0x53, 0x63, 0x61, 0x6c,
+ 0x69, 0x6e, 0x67, 0x18, 0x2b, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x52, 0x6f, 0x70, 0x65, 0x53,
+ 0x63, 0x61, 0x6c, 0x69, 0x6e, 0x67, 0x12, 0x24, 0x0a, 0x0d, 0x59, 0x61, 0x72, 0x6e, 0x45, 0x78,
+ 0x74, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x18, 0x2c, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0d, 0x59,
+ 0x61, 0x72, 0x6e, 0x45, 0x78, 0x74, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x12, 0x26, 0x0a, 0x0e,
+ 0x59, 0x61, 0x72, 0x6e, 0x41, 0x74, 0x74, 0x6e, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x18, 0x2d,
+ 0x20, 0x01, 0x28, 0x02, 0x52, 0x0e, 0x59, 0x61, 0x72, 0x6e, 0x41, 0x74, 0x74, 0x6e, 0x46, 0x61,
+ 0x63, 0x74, 0x6f, 0x72, 0x12, 0x22, 0x0a, 0x0c, 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61,
+ 0x46, 0x61, 0x73, 0x74, 0x18, 0x2e, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, 0x59, 0x61, 0x72, 0x6e,
+ 0x42, 0x65, 0x74, 0x61, 0x46, 0x61, 0x73, 0x74, 0x12, 0x22, 0x0a, 0x0c, 0x59, 0x61, 0x72, 0x6e,
+ 0x42, 0x65, 0x74, 0x61, 0x53, 0x6c, 0x6f, 0x77, 0x18, 0x2f, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c,
+ 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, 0x53, 0x6c, 0x6f, 0x77, 0x12, 0x12, 0x0a, 0x04,
+ 0x54, 0x79, 0x70, 0x65, 0x18, 0x31, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x54, 0x79, 0x70, 0x65,
+ 0x22, 0x3c, 0x0a, 0x06, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65,
+ 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73,
+ 0x73, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x18,
+ 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x22, 0x31,
+ 0x0a, 0x0f, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c,
+ 0x74, 0x12, 0x1e, 0x0a, 0x0a, 0x65, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18,
+ 0x01, 0x20, 0x03, 0x28, 0x02, 0x52, 0x0a, 0x65, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67,
+ 0x73, 0x22, 0x5b, 0x0a, 0x11, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52,
+ 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x02, 0x20,
+ 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x6c, 0x61, 0x6e, 0x67,
+ 0x75, 0x61, 0x67, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x6c, 0x61, 0x6e, 0x67,
+ 0x75, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x74, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18,
+ 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x07, 0x74, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x22, 0x5e,
+ 0x0a, 0x10, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x73, 0x75,
+ 0x6c, 0x74, 0x12, 0x36, 0x0a, 0x08, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x01,
+ 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54,
+ 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74,
+ 0x52, 0x08, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65,
+ 0x78, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x22, 0x77,
+ 0x0a, 0x11, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x53, 0x65, 0x67, 0x6d,
+ 0x65, 0x6e, 0x74, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52,
+ 0x02, 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x18, 0x02, 0x20, 0x01,
+ 0x28, 0x03, 0x52, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x65, 0x6e, 0x64,
+ 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x03, 0x65, 0x6e, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x74,
+ 0x65, 0x78, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12,
+ 0x16, 0x0a, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x05, 0x52,
+ 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x22, 0xbe, 0x02, 0x0a, 0x14, 0x47, 0x65, 0x6e, 0x65,
+ 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74,
+ 0x12, 0x16, 0x0a, 0x06, 0x68, 0x65, 0x69, 0x67, 0x68, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05,
+ 0x52, 0x06, 0x68, 0x65, 0x69, 0x67, 0x68, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x77, 0x69, 0x64, 0x74,
+ 0x68, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x77, 0x69, 0x64, 0x74, 0x68, 0x12, 0x12,
+ 0x0a, 0x04, 0x6d, 0x6f, 0x64, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x6d, 0x6f,
+ 0x64, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x74, 0x65, 0x70, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05,
+ 0x52, 0x04, 0x73, 0x74, 0x65, 0x70, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x65, 0x65, 0x64, 0x18, 0x05,
+ 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x73, 0x65, 0x65, 0x64, 0x12, 0x27, 0x0a, 0x0f, 0x70, 0x6f,
+ 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x06, 0x20,
+ 0x01, 0x28, 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f,
+ 0x6d, 0x70, 0x74, 0x12, 0x27, 0x0a, 0x0f, 0x6e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x5f,
+ 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x6e, 0x65,
+ 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x10, 0x0a, 0x03,
+ 0x64, 0x73, 0x74, 0x18, 0x08, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x12, 0x10,
+ 0x0a, 0x03, 0x73, 0x72, 0x63, 0x18, 0x09, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x72, 0x63,
+ 0x12, 0x2a, 0x0a, 0x10, 0x45, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65,
+ 0x74, 0x65, 0x72, 0x73, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x09, 0x52, 0x10, 0x45, 0x6e, 0x61, 0x62,
+ 0x6c, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x73, 0x12, 0x1a, 0x0a, 0x08,
+ 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08,
+ 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x22, 0x5e, 0x0a, 0x0a, 0x54, 0x54, 0x53, 0x52,
+ 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x01,
+ 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x6d, 0x6f,
+ 0x64, 0x65, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c,
+ 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64,
+ 0x73, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x6f, 0x69, 0x63, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28,
+ 0x09, 0x52, 0x05, 0x76, 0x6f, 0x69, 0x63, 0x65, 0x22, 0x46, 0x0a, 0x14, 0x54, 0x6f, 0x6b, 0x65,
+ 0x6e, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65,
+ 0x12, 0x16, 0x0a, 0x06, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05,
+ 0x52, 0x06, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x12, 0x16, 0x0a, 0x06, 0x74, 0x6f, 0x6b, 0x65,
+ 0x6e, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x05, 0x52, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73,
+ 0x22, 0xac, 0x01, 0x0a, 0x0f, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65,
+ 0x44, 0x61, 0x74, 0x61, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x18, 0x01, 0x20,
+ 0x01, 0x28, 0x04, 0x52, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x12, 0x45, 0x0a, 0x09, 0x62, 0x72,
+ 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x27, 0x2e,
+ 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73,
+ 0x61, 0x67, 0x65, 0x44, 0x61, 0x74, 0x61, 0x2e, 0x42, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77,
+ 0x6e, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x09, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77,
+ 0x6e, 0x1a, 0x3c, 0x0a, 0x0e, 0x42, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x45, 0x6e,
+ 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09,
+ 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02,
+ 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22,
+ 0xbc, 0x01, 0x0a, 0x0e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e,
+ 0x73, 0x65, 0x12, 0x33, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28,
+ 0x0e, 0x32, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x61, 0x74,
+ 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65,
+ 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x30, 0x0a, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72,
+ 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e,
+ 0x64, 0x2e, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65, 0x44, 0x61, 0x74,
+ 0x61, 0x52, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x22, 0x43, 0x0a, 0x05, 0x53, 0x74, 0x61,
+ 0x74, 0x65, 0x12, 0x11, 0x0a, 0x0d, 0x55, 0x4e, 0x49, 0x4e, 0x49, 0x54, 0x49, 0x41, 0x4c, 0x49,
+ 0x5a, 0x45, 0x44, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x42, 0x55, 0x53, 0x59, 0x10, 0x01, 0x12,
+ 0x09, 0x0a, 0x05, 0x52, 0x45, 0x41, 0x44, 0x59, 0x10, 0x02, 0x12, 0x12, 0x0a, 0x05, 0x45, 0x52,
+ 0x52, 0x4f, 0x52, 0x10, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0x22, 0x37,
+ 0x0a, 0x07, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x6f, 0x6c,
+ 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x72, 0x6f, 0x6c, 0x65, 0x12, 0x18, 0x0a,
+ 0x07, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07,
+ 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x32, 0xfb, 0x06, 0x0a, 0x07, 0x42, 0x61, 0x63, 0x6b,
+ 0x65, 0x6e, 0x64, 0x12, 0x32, 0x0a, 0x06, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x12, 0x16, 0x2e,
+ 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65,
+ 0x73, 0x73, 0x61, 0x67, 0x65, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e,
+ 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x34, 0x0a, 0x07, 0x50, 0x72, 0x65, 0x64, 0x69,
+ 0x63, 0x74, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65,
+ 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0e, 0x2e, 0x62, 0x61,
+ 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x35, 0x0a,
+ 0x09, 0x4c, 0x6f, 0x61, 0x64, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x15, 0x2e, 0x62, 0x61, 0x63,
+ 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e,
+ 0x73, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75,
+ 0x6c, 0x74, 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0d, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x53,
+ 0x74, 0x72, 0x65, 0x61, 0x6d, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e,
+ 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0e,
+ 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00,
+ 0x30, 0x01, 0x12, 0x40, 0x0a, 0x09, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x12,
+ 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63,
+ 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65,
+ 0x6e, 0x64, 0x2e, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75,
+ 0x6c, 0x74, 0x22, 0x00, 0x12, 0x41, 0x0a, 0x0d, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65,
+ 0x49, 0x6d, 0x61, 0x67, 0x65, 0x12, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e,
+ 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x52, 0x65, 0x71,
+ 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52,
+ 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4d, 0x0a, 0x12, 0x41, 0x75, 0x64, 0x69, 0x6f,
+ 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x1a, 0x2e,
+ 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69,
+ 0x70, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x19, 0x2e, 0x62, 0x61, 0x63, 0x6b,
+ 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65,
+ 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x2d, 0x0a, 0x03, 0x54, 0x54, 0x53, 0x12, 0x13, 0x2e,
+ 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x54, 0x53, 0x52, 0x65, 0x71, 0x75, 0x65,
+ 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73,
+ 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4a, 0x0a, 0x0e, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a,
+ 0x65, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e,
0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73,
- 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79,
- 0x22, 0x00, 0x12, 0x35, 0x0a, 0x09, 0x4c, 0x6f, 0x61, 0x64, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12,
- 0x15, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f,
+ 0x1a, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x6f, 0x6b, 0x65, 0x6e,
+ 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22,
+ 0x00, 0x12, 0x3b, 0x0a, 0x06, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x16, 0x2e, 0x62, 0x61,
+ 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73,
+ 0x61, 0x67, 0x65, 0x1a, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74,
+ 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x39,
+ 0x0a, 0x09, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x53, 0x65, 0x74, 0x12, 0x19, 0x2e, 0x62, 0x61,
+ 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x53, 0x65, 0x74, 0x4f,
0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64,
- 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0d, 0x50, 0x72, 0x65,
- 0x64, 0x69, 0x63, 0x74, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63,
- 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69,
- 0x6f, 0x6e, 0x73, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65,
- 0x70, 0x6c, 0x79, 0x22, 0x00, 0x30, 0x01, 0x12, 0x40, 0x0a, 0x09, 0x45, 0x6d, 0x62, 0x65, 0x64,
- 0x64, 0x69, 0x6e, 0x67, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50,
- 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x18, 0x2e,
- 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e,
- 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x41, 0x0a, 0x0d, 0x47, 0x65, 0x6e,
- 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x12, 0x1d, 0x2e, 0x62, 0x61, 0x63,
- 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61,
- 0x67, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b,
- 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4d, 0x0a, 0x12,
- 0x41, 0x75, 0x64, 0x69, 0x6f, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69,
- 0x6f, 0x6e, 0x12, 0x1a, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61,
- 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x19,
- 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72,
- 0x69, 0x70, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x2d, 0x0a, 0x03, 0x54,
- 0x54, 0x53, 0x12, 0x13, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x54, 0x53,
- 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e,
- 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4a, 0x0a, 0x0e, 0x54, 0x6f,
- 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x12, 0x17, 0x2e, 0x62,
- 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70,
- 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e,
- 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70,
- 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x3b, 0x0a, 0x06, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73,
- 0x12, 0x16, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74,
- 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x1a, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65,
- 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73,
- 0x65, 0x22, 0x00, 0x12, 0x39, 0x0a, 0x09, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x53, 0x65, 0x74,
- 0x12, 0x19, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65,
- 0x73, 0x53, 0x65, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0f, 0x2e, 0x62, 0x61,
- 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x3f,
- 0x0a, 0x0c, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x12, 0x1c,
- 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x44,
- 0x65, 0x6c, 0x65, 0x74, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0f, 0x2e, 0x62,
- 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12,
- 0x42, 0x0a, 0x09, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x47, 0x65, 0x74, 0x12, 0x19, 0x2e, 0x62,
- 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x47, 0x65, 0x74,
- 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e,
- 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x47, 0x65, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c,
- 0x74, 0x22, 0x00, 0x12, 0x45, 0x0a, 0x0a, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69, 0x6e,
- 0x64, 0x12, 0x1a, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72,
- 0x65, 0x73, 0x46, 0x69, 0x6e, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x19, 0x2e,
- 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69,
- 0x6e, 0x64, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x42, 0x5a, 0x0a, 0x19, 0x69, 0x6f,
- 0x2e, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2e, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x61, 0x69, 0x2e,
- 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x42, 0x0e, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49,
- 0x42, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x50, 0x01, 0x5a, 0x2b, 0x67, 0x69, 0x74, 0x68, 0x75,
- 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x67, 0x6f, 0x2d, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2f,
- 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x67, 0x72, 0x70, 0x63,
- 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
+ 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x3f, 0x0a, 0x0c, 0x53, 0x74, 0x6f,
+ 0x72, 0x65, 0x73, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x12, 0x1c, 0x2e, 0x62, 0x61, 0x63, 0x6b,
+ 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65,
+ 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e,
+ 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x42, 0x0a, 0x09, 0x53, 0x74,
+ 0x6f, 0x72, 0x65, 0x73, 0x47, 0x65, 0x74, 0x12, 0x19, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e,
+ 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x47, 0x65, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f,
+ 0x6e, 0x73, 0x1a, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f,
+ 0x72, 0x65, 0x73, 0x47, 0x65, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x45,
+ 0x0a, 0x0a, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69, 0x6e, 0x64, 0x12, 0x1a, 0x2e, 0x62,
+ 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69, 0x6e,
+ 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x19, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65,
+ 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69, 0x6e, 0x64, 0x52, 0x65, 0x73,
+ 0x75, 0x6c, 0x74, 0x22, 0x00, 0x42, 0x5a, 0x0a, 0x19, 0x69, 0x6f, 0x2e, 0x73, 0x6b, 0x79, 0x6e,
+ 0x65, 0x74, 0x2e, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x61, 0x69, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65,
+ 0x6e, 0x64, 0x42, 0x0e, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49, 0x42, 0x61, 0x63, 0x6b, 0x65,
+ 0x6e, 0x64, 0x50, 0x01, 0x5a, 0x2b, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d,
+ 0x2f, 0x67, 0x6f, 0x2d, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2f, 0x4c, 0x6f, 0x63, 0x61, 0x6c,
+ 0x41, 0x49, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x67, 0x72, 0x70, 0x63, 0x2f, 0x70, 0x72, 0x6f, 0x74,
+ 0x6f, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
}
var (
@@ -2470,7 +2551,7 @@ func file_backend_proto_rawDescGZIP() []byte {
}
var file_backend_proto_enumTypes = make([]protoimpl.EnumInfo, 1)
-var file_backend_proto_msgTypes = make([]protoimpl.MessageInfo, 23)
+var file_backend_proto_msgTypes = make([]protoimpl.MessageInfo, 24)
var file_backend_proto_goTypes = []interface{}{
(StatusResponse_State)(0), // 0: backend.StatusResponse.State
(*StoresKey)(nil), // 1: backend.StoresKey
@@ -2495,7 +2576,8 @@ var file_backend_proto_goTypes = []interface{}{
(*TokenizationResponse)(nil), // 20: backend.TokenizationResponse
(*MemoryUsageData)(nil), // 21: backend.MemoryUsageData
(*StatusResponse)(nil), // 22: backend.StatusResponse
- nil, // 23: backend.MemoryUsageData.BreakdownEntry
+ (*Message)(nil), // 23: backend.Message
+ nil, // 24: backend.MemoryUsageData.BreakdownEntry
}
var file_backend_proto_depIdxs = []int32{
1, // 0: backend.StoresSetOptions.Keys:type_name -> backend.StoresKey
@@ -2507,43 +2589,44 @@ var file_backend_proto_depIdxs = []int32{
1, // 6: backend.StoresFindOptions.Key:type_name -> backend.StoresKey
1, // 7: backend.StoresFindResult.Keys:type_name -> backend.StoresKey
2, // 8: backend.StoresFindResult.Values:type_name -> backend.StoresValue
- 17, // 9: backend.TranscriptResult.segments:type_name -> backend.TranscriptSegment
- 23, // 10: backend.MemoryUsageData.breakdown:type_name -> backend.MemoryUsageData.BreakdownEntry
- 0, // 11: backend.StatusResponse.state:type_name -> backend.StatusResponse.State
- 21, // 12: backend.StatusResponse.memory:type_name -> backend.MemoryUsageData
- 9, // 13: backend.Backend.Health:input_type -> backend.HealthMessage
- 10, // 14: backend.Backend.Predict:input_type -> backend.PredictOptions
- 12, // 15: backend.Backend.LoadModel:input_type -> backend.ModelOptions
- 10, // 16: backend.Backend.PredictStream:input_type -> backend.PredictOptions
- 10, // 17: backend.Backend.Embedding:input_type -> backend.PredictOptions
- 18, // 18: backend.Backend.GenerateImage:input_type -> backend.GenerateImageRequest
- 15, // 19: backend.Backend.AudioTranscription:input_type -> backend.TranscriptRequest
- 19, // 20: backend.Backend.TTS:input_type -> backend.TTSRequest
- 10, // 21: backend.Backend.TokenizeString:input_type -> backend.PredictOptions
- 9, // 22: backend.Backend.Status:input_type -> backend.HealthMessage
- 3, // 23: backend.Backend.StoresSet:input_type -> backend.StoresSetOptions
- 4, // 24: backend.Backend.StoresDelete:input_type -> backend.StoresDeleteOptions
- 5, // 25: backend.Backend.StoresGet:input_type -> backend.StoresGetOptions
- 7, // 26: backend.Backend.StoresFind:input_type -> backend.StoresFindOptions
- 11, // 27: backend.Backend.Health:output_type -> backend.Reply
- 11, // 28: backend.Backend.Predict:output_type -> backend.Reply
- 13, // 29: backend.Backend.LoadModel:output_type -> backend.Result
- 11, // 30: backend.Backend.PredictStream:output_type -> backend.Reply
- 14, // 31: backend.Backend.Embedding:output_type -> backend.EmbeddingResult
- 13, // 32: backend.Backend.GenerateImage:output_type -> backend.Result
- 16, // 33: backend.Backend.AudioTranscription:output_type -> backend.TranscriptResult
- 13, // 34: backend.Backend.TTS:output_type -> backend.Result
- 20, // 35: backend.Backend.TokenizeString:output_type -> backend.TokenizationResponse
- 22, // 36: backend.Backend.Status:output_type -> backend.StatusResponse
- 13, // 37: backend.Backend.StoresSet:output_type -> backend.Result
- 13, // 38: backend.Backend.StoresDelete:output_type -> backend.Result
- 6, // 39: backend.Backend.StoresGet:output_type -> backend.StoresGetResult
- 8, // 40: backend.Backend.StoresFind:output_type -> backend.StoresFindResult
- 27, // [27:41] is the sub-list for method output_type
- 13, // [13:27] is the sub-list for method input_type
- 13, // [13:13] is the sub-list for extension type_name
- 13, // [13:13] is the sub-list for extension extendee
- 0, // [0:13] is the sub-list for field type_name
+ 23, // 9: backend.PredictOptions.Messages:type_name -> backend.Message
+ 17, // 10: backend.TranscriptResult.segments:type_name -> backend.TranscriptSegment
+ 24, // 11: backend.MemoryUsageData.breakdown:type_name -> backend.MemoryUsageData.BreakdownEntry
+ 0, // 12: backend.StatusResponse.state:type_name -> backend.StatusResponse.State
+ 21, // 13: backend.StatusResponse.memory:type_name -> backend.MemoryUsageData
+ 9, // 14: backend.Backend.Health:input_type -> backend.HealthMessage
+ 10, // 15: backend.Backend.Predict:input_type -> backend.PredictOptions
+ 12, // 16: backend.Backend.LoadModel:input_type -> backend.ModelOptions
+ 10, // 17: backend.Backend.PredictStream:input_type -> backend.PredictOptions
+ 10, // 18: backend.Backend.Embedding:input_type -> backend.PredictOptions
+ 18, // 19: backend.Backend.GenerateImage:input_type -> backend.GenerateImageRequest
+ 15, // 20: backend.Backend.AudioTranscription:input_type -> backend.TranscriptRequest
+ 19, // 21: backend.Backend.TTS:input_type -> backend.TTSRequest
+ 10, // 22: backend.Backend.TokenizeString:input_type -> backend.PredictOptions
+ 9, // 23: backend.Backend.Status:input_type -> backend.HealthMessage
+ 3, // 24: backend.Backend.StoresSet:input_type -> backend.StoresSetOptions
+ 4, // 25: backend.Backend.StoresDelete:input_type -> backend.StoresDeleteOptions
+ 5, // 26: backend.Backend.StoresGet:input_type -> backend.StoresGetOptions
+ 7, // 27: backend.Backend.StoresFind:input_type -> backend.StoresFindOptions
+ 11, // 28: backend.Backend.Health:output_type -> backend.Reply
+ 11, // 29: backend.Backend.Predict:output_type -> backend.Reply
+ 13, // 30: backend.Backend.LoadModel:output_type -> backend.Result
+ 11, // 31: backend.Backend.PredictStream:output_type -> backend.Reply
+ 14, // 32: backend.Backend.Embedding:output_type -> backend.EmbeddingResult
+ 13, // 33: backend.Backend.GenerateImage:output_type -> backend.Result
+ 16, // 34: backend.Backend.AudioTranscription:output_type -> backend.TranscriptResult
+ 13, // 35: backend.Backend.TTS:output_type -> backend.Result
+ 20, // 36: backend.Backend.TokenizeString:output_type -> backend.TokenizationResponse
+ 22, // 37: backend.Backend.Status:output_type -> backend.StatusResponse
+ 13, // 38: backend.Backend.StoresSet:output_type -> backend.Result
+ 13, // 39: backend.Backend.StoresDelete:output_type -> backend.Result
+ 6, // 40: backend.Backend.StoresGet:output_type -> backend.StoresGetResult
+ 8, // 41: backend.Backend.StoresFind:output_type -> backend.StoresFindResult
+ 28, // [28:42] is the sub-list for method output_type
+ 14, // [14:28] is the sub-list for method input_type
+ 14, // [14:14] is the sub-list for extension type_name
+ 14, // [14:14] is the sub-list for extension extendee
+ 0, // [0:14] is the sub-list for field type_name
}
func init() { file_backend_proto_init() }
@@ -2816,6 +2899,18 @@ func file_backend_proto_init() {
return nil
}
}
+ file_backend_proto_msgTypes[22].Exporter = func(v interface{}, i int) interface{} {
+ switch v := v.(*Message); i {
+ case 0:
+ return &v.state
+ case 1:
+ return &v.sizeCache
+ case 2:
+ return &v.unknownFields
+ default:
+ return nil
+ }
+ }
}
type x struct{}
out := protoimpl.TypeBuilder{
@@ -2823,7 +2918,7 @@ func file_backend_proto_init() {
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
RawDescriptor: file_backend_proto_rawDesc,
NumEnums: 1,
- NumMessages: 23,
+ NumMessages: 24,
NumExtensions: 0,
NumServices: 1,
},
diff --git a/pkg/grpc/proto/backend_grpc.pb.go b/pkg/grpc/proto/backend_grpc.pb.go
index 0314cd4e..a1f442e0 100644
--- a/pkg/grpc/proto/backend_grpc.pb.go
+++ b/pkg/grpc/proto/backend_grpc.pb.go
@@ -1,7 +1,7 @@
// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
// versions:
// - protoc-gen-go-grpc v1.3.0
-// - protoc v4.23.4
+// - protoc v5.26.1
// source: backend.proto
package proto
From 099bd54ff21311a17f9a33f80d83da6aaa0bc524 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Thu, 11 Apr 2024 19:22:30 +0200
Subject: [PATCH 0130/2750] ci: try to build on macos14 (#2011)
* ci: try to build on macos14
Signed-off-by: Ettore Di Giacinto
* ci: fixup artifact name
Signed-off-by: Ettore Di Giacinto
---------
Signed-off-by: Ettore Di Giacinto
---
.github/workflows/release.yaml | 44 ++++++++++++++++++++++++++++++++++
1 file changed, 44 insertions(+)
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index a69a2b05..3c1cea44 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -158,3 +158,47 @@ jobs:
with:
files: |
release/*
+
+
+ build-macOS-arm64:
+ strategy:
+ matrix:
+ include:
+ - build: 'avx2'
+ defines: ''
+ - build: 'avx'
+ defines: '-DLLAMA_AVX2=OFF'
+ - build: 'avx512'
+ defines: '-DLLAMA_AVX512=ON'
+ runs-on: macos-14
+ steps:
+ - name: Clone
+ uses: actions/checkout@v4
+ with:
+ submodules: true
+ - uses: actions/setup-go@v5
+ with:
+ go-version: '1.21.x'
+ cache: false
+ - name: Dependencies
+ run: |
+ brew install protobuf grpc
+ - name: Build
+ id: build
+ env:
+ CMAKE_ARGS: "${{ matrix.defines }}"
+ BUILD_ID: "${{ matrix.build }}"
+ run: |
+ export C_INCLUDE_PATH=/usr/local/include
+ export CPLUS_INCLUDE_PATH=/usr/local/include
+ make dist
+ - uses: actions/upload-artifact@v4
+ with:
+ name: LocalAI-MacOS-arm64-${{ matrix.build }}
+ path: release/
+ - name: Release
+ uses: softprops/action-gh-release@v1
+ if: startsWith(github.ref, 'refs/tags/')
+ with:
+ files: |
+ release/*
From 70c4f110a49fd2f5f0f216932171f3dd0ae0d443 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Thu, 11 Apr 2024 20:18:05 +0200
Subject: [PATCH 0131/2750] Update overview.md
---
docs/content/docs/overview.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/docs/content/docs/overview.md b/docs/content/docs/overview.md
index 3c3a397d..6aede1d6 100644
--- a/docs/content/docs/overview.md
+++ b/docs/content/docs/overview.md
@@ -67,9 +67,9 @@ Start the image with Docker to have a functional clone of OpenAI! 🚀:
docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu
# Do you have a Nvidia GPUs? Use this instead
# CUDA 11
-# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:latest-aio-gpu-cuda-11
+# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:latest-aio-gpu-nvidia-cuda-11
# CUDA 12
-# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:latest-aio-gpu-cuda-12
+# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:latest-aio-gpu-nvidia-cuda-12
```
See the [💻 Quickstart](https://localai.io/basics/getting_started/) for all the options and way you can run LocalAI!
From da82ce81b5dd139932fb3a8e8fd565f36be79d38 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 11 Apr 2024 18:57:33 +0000
Subject: [PATCH 0132/2750] build(deps): bump github.com/opencontainers/runc
from 1.1.5 to 1.1.12 (#2000)
Bumps [github.com/opencontainers/runc](https://github.com/opencontainers/runc) from 1.1.5 to 1.1.12.
- [Release notes](https://github.com/opencontainers/runc/releases)
- [Changelog](https://github.com/opencontainers/runc/blob/main/CHANGELOG.md)
- [Commits](https://github.com/opencontainers/runc/compare/v1.1.5...v1.1.12)
---
updated-dependencies:
- dependency-name: github.com/opencontainers/runc
dependency-type: indirect
...
Signed-off-by: dependabot[bot]
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
go.mod | 2 +-
go.sum | 36 ++----------------------------------
2 files changed, 3 insertions(+), 35 deletions(-)
diff --git a/go.mod b/go.mod
index 298f2d69..99af8ce7 100644
--- a/go.mod
+++ b/go.mod
@@ -109,7 +109,7 @@ require (
github.com/olekukonko/tablewriter v0.0.5 // indirect
github.com/opencontainers/go-digest v1.0.0 // indirect
github.com/opencontainers/image-spec v1.0.2 // indirect
- github.com/opencontainers/runc v1.1.5 // indirect
+ github.com/opencontainers/runc v1.1.12 // indirect
github.com/pierrec/lz4/v4 v4.1.2 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pkoukk/tiktoken-go v0.1.2 // indirect
diff --git a/go.sum b/go.sum
index 551dd922..a421e79c 100644
--- a/go.sum
+++ b/go.sum
@@ -1,6 +1,5 @@
github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78 h1:w+iIsaOQNcT7OZ575w+acHgRric5iCyQh+xv+KJ4HB8=
github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX56iTiv29bbRTIsUNlaFfuhWRQBWjQdVyAevI8=
-github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf h1:UgjXLcE9I+VaVz7uBIlzAnyZIXwiDlIiTWqCh159aUI=
@@ -38,20 +37,14 @@ github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj
github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/charmbracelet/glamour v0.7.0 h1:2BtKGZ4iVJCDfMF229EzbeR1QRKLWztO9dMtjmqZSng=
github.com/charmbracelet/glamour v0.7.0/go.mod h1:jUMh5MeihljJPQbJ/wf4ldw2+yBP59+ctV36jASy7ps=
-github.com/checkpoint-restore/go-criu/v5 v5.3.0/go.mod h1:E/eQpaFtUKGOOSEBZgmKAcn+zUUwWxqcaKZlF54wK8E=
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
-github.com/cilium/ebpf v0.7.0/go.mod h1:/oI2+1shJiTGAMgl6/RgJr36Eo1jzrRcAWbcXO2usCA=
-github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U=
github.com/containerd/continuity v0.3.0 h1:nisirsYROK15TAMVukJOUyGJjz4BNQJBVsNvAXZJ/eg=
github.com/containerd/continuity v0.3.0/go.mod h1:wJEAIwKOm/pBZuBd0JmeTvnLquTB1Ag8espWhkykbPM=
-github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
-github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
github.com/creack/pty v1.1.11 h1:07n33Z8lZxZ2qwegKbObQohDhXDQxiMMz1NOUGYlesw=
github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
-github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@@ -70,7 +63,6 @@ github.com/donomii/go-rwkv.cpp v0.0.0-20230715075832-c898cd0f62df/go.mod h1:gWy7
github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 h1:iFaUwBSo5Svw6L7HYpRu/0lE3e0BaElwnNO1qkNQxBY=
github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s=
github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
-github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
@@ -108,7 +100,6 @@ github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg78
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
-github.com/godbus/dbus/v5 v5.0.6/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/gofiber/fiber/v2 v2.52.4 h1:P+T+4iK7VaqUsq2PALYEfBBo6bJZ4q3FP8cZ84EggTM=
github.com/gofiber/fiber/v2 v2.52.4/go.mod h1:KEOE+cXMhXG0zHc9d8+E38hoX+ZN7bhOtgeF2oT6jrQ=
github.com/gofiber/swagger v1.0.0 h1:BzUzDS9ZT6fDUa692kxmfOjc1DZiloLiPK/W5z1H1tc=
@@ -137,7 +128,6 @@ github.com/golang/snappy v0.0.2/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEW
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
@@ -180,11 +170,8 @@ github.com/klauspost/compress v1.17.0/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQs
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE=
github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
-github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
-github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
-github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/lib/pq v0.0.0-20180327071824-d34b9ff171c2 h1:hRGSmZu7j271trc9sneMrpOW7GN5ngLm8YUZIPzf394=
@@ -221,10 +208,8 @@ github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyua
github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
github.com/mitchellh/reflectwalk v1.0.0 h1:9D+8oIskB4VJBN5SFlmc27fSlIBZaov1Wpk/IfikLNY=
github.com/mitchellh/reflectwalk v1.0.0/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
-github.com/moby/sys/mountinfo v0.5.0/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdxbhnCLlSvSU=
github.com/moby/term v0.0.0-20201216013528-df9cb8a40635 h1:rzf0wL0CHVc8CEsgyygG0Mn9CNCCPZqOPaz8RiiHYQk=
github.com/moby/term v0.0.0-20201216013528-df9cb8a40635/go.mod h1:FBS0z0QWA44HXygs7VXDUOGoN/1TV3RuWkLO04am3wc=
-github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760 h1:OFVkSxR7CRSRSNm5dvpMRZwmSwWa8EMMnHbc84fW5tU=
github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig=
github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c h1:CI5uGwqBpN8N7BrSKC+nmdfw+9nPQIDyjHHlaIiitZI=
@@ -259,10 +244,8 @@ github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
github.com/opencontainers/image-spec v1.0.2 h1:9yCKha/T5XdGtO0q9Q9a6T5NUCsTn/DrBg0D7ufOcFM=
github.com/opencontainers/image-spec v1.0.2/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0=
-github.com/opencontainers/runc v1.1.5 h1:L44KXEpKmfWDcS02aeGm8QNTFXTo2D+8MYGDIJ/GDEs=
-github.com/opencontainers/runc v1.1.5/go.mod h1:1J5XiS+vdZ3wCyZybsuxXZWGrgSr8fFJHLXuG2PsnNg=
-github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
-github.com/opencontainers/selinux v1.10.0/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI=
+github.com/opencontainers/runc v1.1.12 h1:BOIssBaW1La0/qbNZHXOOa71dZfZEQOzW7dqQf3phss=
+github.com/opencontainers/runc v1.1.12/go.mod h1:S+lQwSfncpBha7XTy/5lBwWgm5+y5Ma/O44Ekby9FK8=
github.com/ory/dockertest/v3 v3.10.0 h1:4K3z2VMe8Woe++invjaTB7VRyQXQy5UY+loujO4aNE4=
github.com/ory/dockertest/v3 v3.10.0/go.mod h1:nr57ZbRWMqfsdGdFNLHz5jjNdDb7VVFnzAeW1n5N1Lg=
github.com/otiai10/mint v1.6.1 h1:kgbTJmOpp/0ce7hk3H8jiSuR0MXmpwWRfqUdKww17qg=
@@ -300,12 +283,10 @@ github.com/rs/zerolog v1.31.0 h1:FcTR3NnLWW+NnTwwhFWiJSZr4ECLpqCm6QsEnyvbV4A=
github.com/rs/zerolog v1.31.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
github.com/russross/blackfriday v1.6.0 h1:KqfZb0pUVN2lYqZUYRddxF4OR8ZMURnJIG5Y3VRLtww=
github.com/russross/blackfriday v1.6.0/go.mod h1:ti0ldHuxg49ri4ksnFxlkCfN+hvslNlmVHqNRXXJNAY=
-github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/sashabaranov/go-openai v1.20.4 h1:095xQ/fAtRa0+Rj21sezVJABgKfGPNbyx/sAN/hJUmg=
github.com/sashabaranov/go-openai v1.20.4/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
github.com/schollz/progressbar/v3 v3.13.1 h1:o8rySDYiQ59Mwzy2FELeHY5ZARXZTVJC7iHD6PEFUiE=
github.com/schollz/progressbar/v3 v3.13.1/go.mod h1:xvrbki8kfT1fzWzBT/UZd9L6GA+jdL7HAgq2RFnO6fQ=
-github.com/seccomp/libseccomp-golang v0.9.2-0.20220502022130-f33da4d89646/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg=
github.com/shirou/gopsutil/v3 v3.23.7/go.mod h1:c4gnmoRC0hQuaLqvxnx1//VXQ0Ms/X9UnJF8pddY5z4=
github.com/shirou/gopsutil/v3 v3.23.9 h1:ZI5bWVeu2ep4/DIxB4U9okeYJ7zp/QLTO4auRb/ty/E=
github.com/shirou/gopsutil/v3 v3.23.9/go.mod h1:x/NWSb71eMcjFIO0vhyGW5nZ7oSIgVjrCnADckb85GA=
@@ -315,7 +296,6 @@ github.com/shoenig/test v0.6.4 h1:kVTaSd7WLz5WZ2IaoM0RSzRsUD+m8wRR+5qvntpn4LU=
github.com/shoenig/test v0.6.4/go.mod h1:byHiCGXqrVaflBLAMq/srcZIHynQPQgeyvkvXnjqq0k=
github.com/shopspring/decimal v1.2.0 h1:abSATXmQEYyShuxI4/vyW3tV1MrKAJzCZ/0zLUXYbsQ=
github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
-github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE=
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/spf13/cast v1.3.1 h1:nFm6S0SMdyzrzcmThSipiEubIDy8WEXKNZ0UOgiRpng=
@@ -337,7 +317,6 @@ github.com/swaggo/files/v2 v2.0.0 h1:hmAt8Dkynw7Ssz46F6pn8ok6YmGZqHSVLZ+HQM7i0kw
github.com/swaggo/files/v2 v2.0.0/go.mod h1:24kk2Y9NYEJ5lHuCra6iVwkMjIekMCaFq/0JQj66kyM=
github.com/swaggo/swag v1.16.3 h1:PnCYjPCah8FK4I26l2F/KQ4yz3sILcVUN3cTlBFA9Pg=
github.com/swaggo/swag v1.16.3/go.mod h1:DImHIuOFXKpMFAQjcC7FG4m3Dg4+QuUgUzJmKjI/gRk=
-github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
github.com/tklauser/go-sysconf v0.3.11/go.mod h1:GqXfhXY3kiPa0nAXPDIQIWzJbMCB7AmcWpGR8lSZfqI=
github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFAEVmqU=
github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI=
@@ -349,15 +328,12 @@ github.com/tmc/langchaingo v0.0.0-20231019140956-c636b3da7701/go.mod h1:SiwyRS7s
github.com/ulikunitz/xz v0.5.8/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
github.com/ulikunitz/xz v0.5.9 h1:RsKRIA2MO8x56wkkcd3LbtcE/uMszhb6DpRf+3uwa3I=
github.com/ulikunitz/xz v0.5.9/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
-github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
github.com/valyala/fasthttp v1.51.0 h1:8b30A5JlZ6C7AS81RsWjYMQmrZG6feChmgAolCl1SqA=
github.com/valyala/fasthttp v1.51.0/go.mod h1:oI2XroL+lI7vdXyYoQk03bXBThfFl2cVdIA3Xl7cH8g=
github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8=
github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc=
-github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE=
-github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU=
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f h1:J9EGpcZtP0E/raorCMxlFGSTBrsSlaDGf3jU/qvAE2c=
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHovont7NscjpAxXsDA8S8BMYve8Y5+7cuRE7R0=
@@ -407,7 +383,6 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
-golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
@@ -425,12 +400,10 @@ golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -439,12 +412,8 @@ golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20210906170528-6f6e22806c34/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20211116061358-0a5406a5449c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
@@ -494,7 +463,6 @@ google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzi
google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
-google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI=
google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
From b2785ff06e3eb7c1d62a6c3921ae706d58c054dd Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Fri, 12 Apr 2024 00:49:23 +0200
Subject: [PATCH 0133/2750] feat(gallery): support ConfigURLs (#2012)
Signed-off-by: Ettore Di Giacinto
---
core/http/api_test.go | 24 +++++++++++++++++++++
core/http/endpoints/localai/gallery.go | 4 +++-
core/services/gallery.go | 5 +++++
docs/content/docs/features/model-gallery.md | 10 ++++++---
pkg/gallery/op.go | 1 +
5 files changed, 40 insertions(+), 4 deletions(-)
diff --git a/core/http/api_test.go b/core/http/api_test.go
index 804c15fe..1553ed21 100644
--- a/core/http/api_test.go
+++ b/core/http/api_test.go
@@ -43,6 +43,7 @@ Can you help rephrasing sentences?
type modelApplyRequest struct {
ID string `json:"id"`
URL string `json:"url"`
+ ConfigURL string `json:"config_url"`
Name string `json:"name"`
Overrides map[string]interface{} `json:"overrides"`
}
@@ -366,6 +367,29 @@ var _ = Describe("API test", func() {
Expect(err).ToNot(HaveOccurred())
Expect(content["backend"]).To(Equal("llama"))
})
+ It("apply models from config", func() {
+ response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
+ ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml",
+ })
+
+ Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
+
+ uuid := response["uuid"].(string)
+
+ Eventually(func() bool {
+ response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
+ return response["processed"].(bool)
+ }, "360s", "10s").Should(Equal(true))
+
+ Eventually(func() []string {
+ models, _ := client.ListModels(context.TODO())
+ modelList := []string{}
+ for _, m := range models.Models {
+ modelList = append(modelList, m.ID)
+ }
+ return modelList
+ }, "360s", "10s").Should(ContainElements("hermes-2-pro-mistral"))
+ })
It("apply models without overrides", func() {
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
diff --git a/core/http/endpoints/localai/gallery.go b/core/http/endpoints/localai/gallery.go
index 5c295a2a..b693e7c3 100644
--- a/core/http/endpoints/localai/gallery.go
+++ b/core/http/endpoints/localai/gallery.go
@@ -19,7 +19,8 @@ type ModelGalleryEndpointService struct {
}
type GalleryModel struct {
- ID string `json:"id"`
+ ID string `json:"id"`
+ ConfigURL string `json:"config_url"`
gallery.GalleryModel
}
@@ -64,6 +65,7 @@ func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fibe
Id: uuid.String(),
GalleryName: input.ID,
Galleries: mgs.galleries,
+ ConfigURL: input.ConfigURL,
}
return c.JSON(struct {
ID string `json:"uuid"`
diff --git a/core/services/gallery.go b/core/services/gallery.go
index 826f4573..b068abbb 100644
--- a/core/services/gallery.go
+++ b/core/services/gallery.go
@@ -9,6 +9,7 @@ import (
"github.com/go-skynet/LocalAI/core/config"
"github.com/go-skynet/LocalAI/pkg/gallery"
+ "github.com/go-skynet/LocalAI/pkg/startup"
"github.com/go-skynet/LocalAI/pkg/utils"
"gopkg.in/yaml.v2"
)
@@ -90,6 +91,9 @@ func (g *GalleryService) Start(c context.Context, cl *config.BackendConfigLoader
} else {
err = gallery.InstallModelFromGalleryByName(op.Galleries, op.GalleryName, g.modelPath, op.Req, progressCallback)
}
+ } else if op.ConfigURL != "" {
+ startup.PreloadModelsConfigurations(op.ConfigURL, g.modelPath, op.ConfigURL)
+ err = cl.Preload(g.modelPath)
} else {
err = prepareModel(g.modelPath, op.Req, cl, progressCallback)
}
@@ -129,6 +133,7 @@ func processRequests(modelPath, s string, cm *config.BackendConfigLoader, galler
utils.ResetDownloadTimers()
if r.ID == "" {
err = prepareModel(modelPath, r.GalleryModel, cm, utils.DisplayDownloadFunction)
+
} else {
if strings.Contains(r.ID, "@") {
err = gallery.InstallModelFromGallery(
diff --git a/docs/content/docs/features/model-gallery.md b/docs/content/docs/features/model-gallery.md
index 0d978122..05d15ef4 100644
--- a/docs/content/docs/features/model-gallery.md
+++ b/docs/content/docs/features/model-gallery.md
@@ -146,12 +146,16 @@ In the body of the request you must specify the model configuration file URL (`u
```bash
LOCALAI=http://localhost:8080
curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
- "url": ""
+ "config_url": ""
}'
# or if from a repository
curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
"id": "@"
}'
+# or from a gallery config
+curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
+ "url": ""
+ }'
```
An example that installs openllama can be:
@@ -159,8 +163,8 @@ An example that installs openllama can be:
```bash
LOCALAI=http://localhost:8080
curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
- "url": "https://github.com/go-skynet/model-gallery/blob/main/openllama_3b.yaml"
- }'
+ "config_url": "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml"
+ }'
```
The API will return a job `uuid` that you can use to track the job progress:
diff --git a/pkg/gallery/op.go b/pkg/gallery/op.go
index 873c356d..99796812 100644
--- a/pkg/gallery/op.go
+++ b/pkg/gallery/op.go
@@ -5,6 +5,7 @@ type GalleryOp struct {
Id string
Galleries []Gallery
GalleryName string
+ ConfigURL string
}
type GalleryOpStatus struct {
From 677e20756b31ce158b207b246b0ae373f826897e Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Fri, 12 Apr 2024 00:49:41 +0200
Subject: [PATCH 0134/2750] :arrow_up: Update ggerganov/llama.cpp (#2014)
Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index e2e4f211..e15166a8 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=8228b66dbc16290c5cbd70e80ab47c068e2569d8
+CPPLLAMA_VERSION?=a474f50ebb3e10be3371562f75f3f573f1a86b5f
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
From e0dee52a2ab811fccc18f309a6c5fefcb4725448 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 12 Apr 2024 00:53:43 -0400
Subject: [PATCH 0135/2750] build(deps): bump the pip group across 4
directories with 8 updates (#2017)
* build(deps): bump the pip group across 4 directories with 8 updates
Bumps the pip group with 1 update in the /examples/functions directory: [langchain](https://github.com/langchain-ai/langchain).
Bumps the pip group with 2 updates in the /examples/langchain-chroma directory: [langchain](https://github.com/langchain-ai/langchain) and [llama-index](https://github.com/run-llama/llama_index).
Bumps the pip group with 6 updates in the /examples/langchain/langchainpy-localai-example directory:
| Package | From | To |
| --- | --- | --- |
| [langchain](https://github.com/langchain-ai/langchain) | `0.0.159` | `0.1.0` |
| [aiohttp](https://github.com/aio-libs/aiohttp) | `3.8.4` | `3.9.2` |
| [certifi](https://github.com/certifi/python-certifi) | `2022.12.7` | `2023.7.22` |
| [idna](https://github.com/kjd/idna) | `3.4` | `3.7` |
| [requests](https://github.com/psf/requests) | `2.29.0` | `2.31.0` |
| [urllib3](https://github.com/urllib3/urllib3) | `1.26.15` | `1.26.18` |
Bumps the pip group with 1 update in the /examples/streamlit-bot directory: [streamlit](https://github.com/streamlit/streamlit).
Updates `langchain` from 0.0.234 to 0.1.0
- [Release notes](https://github.com/langchain-ai/langchain/releases)
- [Commits](https://github.com/langchain-ai/langchain/compare/v0.0.234...v0.1.0)
Updates `langchain` from 0.0.160 to 0.1.0
- [Release notes](https://github.com/langchain-ai/langchain/releases)
- [Commits](https://github.com/langchain-ai/langchain/compare/v0.0.234...v0.1.0)
Updates `llama-index` from 0.6.2 to 0.9.36
- [Release notes](https://github.com/run-llama/llama_index/releases)
- [Changelog](https://github.com/run-llama/llama_index/blob/main/CHANGELOG.md)
- [Commits](https://github.com/run-llama/llama_index/compare/v0.6.2...v0.9.36)
Updates `langchain` from 0.0.159 to 0.1.0
- [Release notes](https://github.com/langchain-ai/langchain/releases)
- [Commits](https://github.com/langchain-ai/langchain/compare/v0.0.234...v0.1.0)
Updates `aiohttp` from 3.8.4 to 3.9.2
- [Release notes](https://github.com/aio-libs/aiohttp/releases)
- [Changelog](https://github.com/aio-libs/aiohttp/blob/master/CHANGES.rst)
- [Commits](https://github.com/aio-libs/aiohttp/compare/v3.8.4...v3.9.2)
Updates `certifi` from 2022.12.7 to 2023.7.22
- [Commits](https://github.com/certifi/python-certifi/compare/2022.12.07...2023.07.22)
Updates `idna` from 3.4 to 3.7
- [Release notes](https://github.com/kjd/idna/releases)
- [Changelog](https://github.com/kjd/idna/blob/master/HISTORY.rst)
- [Commits](https://github.com/kjd/idna/compare/v3.4...v3.7)
Updates `requests` from 2.29.0 to 2.31.0
- [Release notes](https://github.com/psf/requests/releases)
- [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md)
- [Commits](https://github.com/psf/requests/compare/v2.29.0...v2.31.0)
Updates `urllib3` from 1.26.15 to 1.26.18
- [Release notes](https://github.com/urllib3/urllib3/releases)
- [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst)
- [Commits](https://github.com/urllib3/urllib3/compare/1.26.15...1.26.18)
Updates `streamlit` from 1.26.0 to 1.30.0
- [Release notes](https://github.com/streamlit/streamlit/releases)
- [Commits](https://github.com/streamlit/streamlit/compare/1.26.0...1.30.0)
---
updated-dependencies:
- dependency-name: langchain
dependency-type: direct:production
dependency-group: pip
- dependency-name: langchain
dependency-type: direct:production
dependency-group: pip
- dependency-name: llama-index
dependency-type: direct:production
dependency-group: pip
- dependency-name: langchain
dependency-type: direct:production
dependency-group: pip
- dependency-name: aiohttp
dependency-type: direct:production
dependency-group: pip
- dependency-name: certifi
dependency-type: direct:production
dependency-group: pip
- dependency-name: idna
dependency-type: direct:production
dependency-group: pip
- dependency-name: requests
dependency-type: direct:production
dependency-group: pip
- dependency-name: urllib3
dependency-type: direct:production
dependency-group: pip
- dependency-name: streamlit
dependency-type: direct:production
dependency-group: pip
...
Signed-off-by: dependabot[bot]
* Update version.json
PR appears stuck on a check, needs any arbitrary commit to run the security check workflow with write permissions. Bumping docs versions to match latest release as said useless change.
Signed-off-by: Dave
---------
Signed-off-by: dependabot[bot]
Signed-off-by: Dave
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Dave
---
docs/data/version.json | 2 +-
examples/functions/requirements.txt | 2 +-
examples/langchain-chroma/requirements.txt | 4 ++--
.../langchainpy-localai-example/requirements.txt | 12 ++++++------
examples/streamlit-bot/requirements.txt | 2 +-
5 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/docs/data/version.json b/docs/data/version.json
index 1b6a2161..6a618115 100644
--- a/docs/data/version.json
+++ b/docs/data/version.json
@@ -1,3 +1,3 @@
{
- "version": "v2.12.3"
+ "version": "v2.12.4"
}
diff --git a/examples/functions/requirements.txt b/examples/functions/requirements.txt
index 7164e011..759c5b03 100644
--- a/examples/functions/requirements.txt
+++ b/examples/functions/requirements.txt
@@ -1,2 +1,2 @@
-langchain==0.0.234
+langchain==0.1.0
openai==0.27.8
diff --git a/examples/langchain-chroma/requirements.txt b/examples/langchain-chroma/requirements.txt
index b9e649c5..cdf466b9 100644
--- a/examples/langchain-chroma/requirements.txt
+++ b/examples/langchain-chroma/requirements.txt
@@ -1,4 +1,4 @@
-langchain==0.0.160
+langchain==0.1.0
openai==0.27.6
chromadb==0.3.21
-llama-index==0.6.2
\ No newline at end of file
+llama-index==0.9.36
\ No newline at end of file
diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt
index 2de5bcf0..1e63b0bf 100644
--- a/examples/langchain/langchainpy-localai-example/requirements.txt
+++ b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -1,16 +1,16 @@
-aiohttp==3.8.4
+aiohttp==3.9.2
aiosignal==1.3.1
async-timeout==4.0.2
attrs==23.1.0
-certifi==2022.12.7
+certifi==2023.7.22
charset-normalizer==3.1.0
colorama==0.4.6
dataclasses-json==0.5.7
debugpy==1.6.7
frozenlist==1.3.3
greenlet==2.0.2
-idna==3.4
-langchain==0.0.159
+idna==3.7
+langchain==0.1.0
marshmallow==3.19.0
marshmallow-enum==1.5.1
multidict==6.0.4
@@ -22,11 +22,11 @@ openapi-schema-pydantic==1.2.4
packaging==23.1
pydantic==1.10.7
PyYAML==6.0
-requests==2.29.0
+requests==2.31.0
SQLAlchemy==2.0.12
tenacity==8.2.2
tqdm==4.65.0
typing-inspect==0.8.0
typing_extensions==4.5.0
-urllib3==1.26.15
+urllib3==1.26.18
yarl==1.9.2
diff --git a/examples/streamlit-bot/requirements.txt b/examples/streamlit-bot/requirements.txt
index ae527c76..1fcd5093 100644
--- a/examples/streamlit-bot/requirements.txt
+++ b/examples/streamlit-bot/requirements.txt
@@ -1,2 +1,2 @@
-streamlit==1.26.0
+streamlit==1.30.0
requests
\ No newline at end of file
From 7e52c8e21ad3ee054444f90d5b16fd49e3f411b9 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Fri, 12 Apr 2024 15:27:40 +0200
Subject: [PATCH 0137/2750] Update CONTRIBUTING.md
Signed-off-by: Ettore Di Giacinto
---
CONTRIBUTING.md | 28 ++++++++++++++++++++++------
1 file changed, 22 insertions(+), 6 deletions(-)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 0e237ea7..593ad0ed 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,4 +1,4 @@
-# Contributing to localAI
+# Contributing to LocalAI
Thank you for your interest in contributing to LocalAI! We appreciate your time and effort in helping to improve our project. Before you get started, please take a moment to review these guidelines.
@@ -29,8 +29,9 @@ Thank you for your interest in contributing to LocalAI! We appreciate your time
1. Clone the repository: `git clone https://github.com/go-skynet/LocalAI.git`
2. Navigate to the project directory: `cd LocalAI`
-3. Install the required dependencies: `make prepare`
-4. Run LocalAI: `make run`
+3. Install the required dependencies ( see https://localai.io/basics/build/#build-localai-locally )
+4. Build LocalAI: `make build`
+5. Run LocalAI: `./local-ai`
## Contributing
@@ -59,14 +60,29 @@ If you find a bug, have a feature request, or encounter any issues, please check
`make test` cannot handle all the model now. Please be sure to add a test case for the new features or the part was changed.
+### Running AIO tests
+
+All-In-One images has a set of tests that automatically verifies that most of the endpoints works correctly, a flow can be :
+
+```bash
+# Build the LocalAI docker image
+make DOCKER_IMAGE=local-ai docker
+
+# Build the corresponding AIO image
+BASE_IMAGE=local-ai DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
+
+# Run the AIO e2e tests
+LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio make run-e2e-aio
+```
+
## Documentation
-- We are welcome the contribution of the documents, please open new PR in the official document repo [localai-website](https://github.com/go-skynet/localai-website)
-
+We are welcome the contribution of the documents, please open new PR or create a new issue. The documentation is available under `docs/` https://github.com/mudler/LocalAI/tree/master/docs
+
## Community and Communication
- You can reach out via the Github issue tracker.
- Open a new discussion at [Discussion](https://github.com/go-skynet/LocalAI/discussions)
- Join the Discord channel [Discord](https://discord.gg/uJAeKSAGDy)
----
\ No newline at end of file
+---
From fb105837bac4b1468db5464ab572bb3ec7e61389 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Fri, 12 Apr 2024 15:37:56 +0200
Subject: [PATCH 0138/2750] Update secscan.yaml
Signed-off-by: Ettore Di Giacinto
---
.github/workflows/secscan.yaml | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/.github/workflows/secscan.yaml b/.github/workflows/secscan.yaml
index 14958070..884b84d5 100644
--- a/.github/workflows/secscan.yaml
+++ b/.github/workflows/secscan.yaml
@@ -15,13 +15,16 @@ jobs:
steps:
- name: Checkout Source
uses: actions/checkout@v4
+ if: ${{ github.actor != 'dependabot[bot]' }}
- name: Run Gosec Security Scanner
+ if: ${{ github.actor != 'dependabot[bot]' }}
uses: securego/gosec@master
with:
# we let the report trigger content trigger a failure using the GitHub Security features.
args: '-no-fail -fmt sarif -out results.sarif ./...'
- name: Upload SARIF file
+ if: ${{ github.actor != 'dependabot[bot]' }}
uses: github/codeql-action/upload-sarif@v2
with:
# Path to SARIF file relative to the root of the repository
- sarif_file: results.sarif
\ No newline at end of file
+ sarif_file: results.sarif
From 18eea9088a866eab14cd3859af13c96653f89c3a Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Fri, 12 Apr 2024 15:38:34 +0200
Subject: [PATCH 0139/2750] Update dependabot_auto.yml
Signed-off-by: Ettore Di Giacinto
---
.github/workflows/dependabot_auto.yml | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/.github/workflows/dependabot_auto.yml b/.github/workflows/dependabot_auto.yml
index 12541d05..22c709e3 100644
--- a/.github/workflows/dependabot_auto.yml
+++ b/.github/workflows/dependabot_auto.yml
@@ -1,7 +1,6 @@
name: Dependabot auto-merge
on:
- pull_request_target:
- types: [review_requested]
+- pull_request_target
permissions:
contents: write
From 69d638268b67afed91b15ae5b124255569589a47 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Fri, 12 Apr 2024 15:57:13 +0200
Subject: [PATCH 0140/2750] Update dependabot_auto.yml
Signed-off-by: Ettore Di Giacinto
---
.github/workflows/dependabot_auto.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/dependabot_auto.yml b/.github/workflows/dependabot_auto.yml
index 22c709e3..f9d03a30 100644
--- a/.github/workflows/dependabot_auto.yml
+++ b/.github/workflows/dependabot_auto.yml
@@ -40,4 +40,4 @@ jobs:
run: gh pr merge --auto --merge "$PR_URL"
env:
PR_URL: ${{github.event.pull_request.html_url}}
- GITHUB_TOKEN: ${{secrets.RELEASE_TOKEN}}
+ GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
From 0e549424e782e315ee166efdb1cba77a1a4a750b Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Fri, 12 Apr 2024 15:59:25 +0200
Subject: [PATCH 0141/2750] Update dependabot_auto.yml
Signed-off-by: Ettore Di Giacinto
---
.github/workflows/dependabot_auto.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/dependabot_auto.yml b/.github/workflows/dependabot_auto.yml
index f9d03a30..51337d20 100644
--- a/.github/workflows/dependabot_auto.yml
+++ b/.github/workflows/dependabot_auto.yml
@@ -37,7 +37,7 @@ jobs:
- name: Enable auto-merge for Dependabot PRs
if: ${{ contains(github.event.pull_request.title, 'bump')}}
- run: gh pr merge --auto --merge "$PR_URL"
+ run: gh pr merge --auto --squash "$PR_URL"
env:
PR_URL: ${{github.event.pull_request.html_url}}
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
From fcb63aed8a969a2419ed593d8facdccf3ab88e5f Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 12 Apr 2024 15:36:46 +0000
Subject: [PATCH 0142/2750] build(deps): bump follow-redirects from 1.15.2 to
1.15.6 in /examples/langchain/langchainjs-localai-example (#2020)
build(deps): bump follow-redirects
Bumps [follow-redirects](https://github.com/follow-redirects/follow-redirects) from 1.15.2 to 1.15.6.
- [Release notes](https://github.com/follow-redirects/follow-redirects/releases)
- [Commits](https://github.com/follow-redirects/follow-redirects/compare/v1.15.2...v1.15.6)
---
updated-dependencies:
- dependency-name: follow-redirects
dependency-type: indirect
...
Signed-off-by: dependabot[bot]
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
.../langchainjs-localai-example/package-lock.json | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/examples/langchain/langchainjs-localai-example/package-lock.json b/examples/langchain/langchainjs-localai-example/package-lock.json
index 29e6999f..e0a45539 100644
--- a/examples/langchain/langchainjs-localai-example/package-lock.json
+++ b/examples/langchain/langchainjs-localai-example/package-lock.json
@@ -369,9 +369,9 @@
}
},
"node_modules/follow-redirects": {
- "version": "1.15.2",
- "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz",
- "integrity": "sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA==",
+ "version": "1.15.6",
+ "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz",
+ "integrity": "sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==",
"funding": [
{
"type": "individual",
@@ -1479,9 +1479,9 @@
"integrity": "sha512-b6suED+5/3rTpUBdG1gupIl8MPFCAMA0QXwmljLhvCUKcUvdE4gWky9zpuGCcXHOsz4J9wPGNWq6OKpmIzz3hQ=="
},
"follow-redirects": {
- "version": "1.15.2",
- "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz",
- "integrity": "sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA=="
+ "version": "1.15.6",
+ "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz",
+ "integrity": "sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA=="
},
"form-data": {
"version": "4.0.0",
From 912d2dccfa63a3a8e6720dda73e30cf8f7d6b944 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sat, 13 Apr 2024 09:13:00 +0200
Subject: [PATCH 0143/2750] :arrow_up: Update ggerganov/llama.cpp (#2024)
Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index e15166a8..0f6d8fd2 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=a474f50ebb3e10be3371562f75f3f573f1a86b5f
+CPPLLAMA_VERSION?=ab9a3240a9da941fdef5cd4a25f2b97c2f5a67aa
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
From a8ebf6f575c502684e9f5118cc99622546f73438 Mon Sep 17 00:00:00 2001
From: cryptk <421501+cryptk@users.noreply.github.com>
Date: Sat, 13 Apr 2024 02:14:32 -0500
Subject: [PATCH 0144/2750] fix: respect concurrency from parent build
parameters when building GRPC (#2023)
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
---
backend/cpp/grpc/Makefile | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/backend/cpp/grpc/Makefile b/backend/cpp/grpc/Makefile
index 6a181794..5308693b 100644
--- a/backend/cpp/grpc/Makefile
+++ b/backend/cpp/grpc/Makefile
@@ -5,7 +5,6 @@ SYSTEM ?= $(HOST_SYSTEM)
TAG_LIB_GRPC?=v1.59.0
GIT_REPO_LIB_GRPC?=https://github.com/grpc/grpc.git
GIT_CLONE_DEPTH?=1
-NUM_BUILD_THREADS?=$(shell nproc --ignore=1)
INSTALLED_PACKAGES=installed_packages
GRPC_REPO=grpc_repo
@@ -52,7 +51,7 @@ $(GRPC_REPO):
$(GRPC_BUILD): $(GRPC_REPO)
mkdir -p $(GRPC_BUILD)
- cd $(GRPC_BUILD) && cmake $(CMAKE_ARGS) ../$(GRPC_REPO)/grpc && cmake --build . -- -j ${NUM_BUILD_THREADS} && cmake --build . --target install -- -j ${NUM_BUILD_THREADS}
+ cd $(GRPC_BUILD) && cmake $(CMAKE_ARGS) ../$(GRPC_REPO)/grpc && cmake --build . && cmake --build . --target install
build: $(INSTALLED_PACKAGES)
From 1981154f49437adcbcb9956611aee4809b406947 Mon Sep 17 00:00:00 2001
From: cryptk <421501+cryptk@users.noreply.github.com>
Date: Sat, 13 Apr 2024 02:37:32 -0500
Subject: [PATCH 0145/2750] fix: dont commit generated files to git (#1993)
* fix: initial work towards not committing generated files to the repository
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
* feat: improve build docs
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
* fix: remove unused folder from .dockerignore and .gitignore
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
* fix: attempt to fix extra backend tests
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
* fix: attempt to fix other tests
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
* fix: more test fixes
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
* fix: fix apple tests
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
* fix: more extras tests fixes
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
* fix: add GOBIN to PATH in docker build
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
* fix: extra tests and Dockerfile corrections
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
* fix: remove build dependency checks
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
* fix: add golang protobuf compilers to tests-linux action
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
* fix: ensure protogen is run for extra backend installs
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
* fix: use newer protobuf
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
* fix: more missing protoc binaries
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
* fix: missing dependencies during docker build
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
* fix: don't install grpc compilers in the final stage if they aren't needed
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
* fix: python-grpc-tools in 22.04 repos is too old
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
* fix: add a couple of extra build dependencies to Makefile
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
* fix: unbreak container rebuild functionality
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
---------
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
---
.dockerignore | 2 +
.github/workflows/test-extra.yml | 29 +-
.github/workflows/test.yml | 25 +-
.gitignore | 5 +
Dockerfile | 33 +-
Makefile | 144 +-
backend/backend_grpc.pb.go | 457 ---
backend/python/autogptq/Makefile | 11 +-
backend/python/autogptq/backend_pb2.py | 79 -
backend/python/autogptq/backend_pb2_grpc.py | 495 ---
backend/python/bark/Makefile | 16 +-
backend/python/bark/backend_pb2.py | 79 -
backend/python/bark/backend_pb2_grpc.py | 495 ---
backend/python/coqui/Makefile | 16 +-
backend/python/coqui/backend_pb2.py | 79 -
backend/python/coqui/backend_pb2_grpc.py | 495 ---
backend/python/diffusers/Makefile | 16 +-
backend/python/diffusers/backend_pb2.py | 79 -
backend/python/diffusers/backend_pb2_grpc.py | 495 ---
backend/python/exllama/Makefile | 14 +-
backend/python/exllama/backend_pb2.py | 79 -
backend/python/exllama/backend_pb2_grpc.py | 495 ---
backend/python/exllama2/Makefile | 14 +-
backend/python/exllama2/backend_pb2.py | 79 -
backend/python/exllama2/backend_pb2_grpc.py | 495 ---
backend/python/mamba/Makefile | 18 +-
backend/python/mamba/backend_pb2.py | 79 -
backend/python/mamba/backend_pb2_grpc.py | 495 ---
backend/python/petals/Makefile | 16 +-
backend/python/petals/backend_pb2.py | 79 -
backend/python/petals/backend_pb2_grpc.py | 495 ---
backend/python/sentencetransformers/Makefile | 16 +-
.../sentencetransformers/backend_pb2.py | 79 -
.../sentencetransformers/backend_pb2_grpc.py | 495 ---
backend/python/transformers-musicgen/Makefile | 17 +-
.../transformers-musicgen/backend_pb2.py | 79 -
.../transformers-musicgen/backend_pb2_grpc.py | 495 ---
backend/python/transformers/Makefile | 16 +-
backend/python/transformers/backend_pb2.py | 79 -
.../python/transformers/backend_pb2_grpc.py | 495 ---
backend/python/vall-e-x/Makefile | 16 +-
backend/python/vall-e-x/backend_pb2.py | 79 -
backend/python/vall-e-x/backend_pb2_grpc.py | 495 ---
backend/python/vllm/Makefile | 18 +-
backend/python/vllm/backend_pb2.py | 79 -
backend/python/vllm/backend_pb2_grpc.py | 495 ---
docs/content/docs/getting-started/build.md | 18 +-
pkg/grpc/proto/backend.pb.go | 2934 -----------------
pkg/grpc/proto/backend_grpc.pb.go | 618 ----
49 files changed, 381 insertions(+), 11550 deletions(-)
delete mode 100644 backend/backend_grpc.pb.go
delete mode 100644 backend/python/autogptq/backend_pb2.py
delete mode 100644 backend/python/autogptq/backend_pb2_grpc.py
delete mode 100644 backend/python/bark/backend_pb2.py
delete mode 100644 backend/python/bark/backend_pb2_grpc.py
delete mode 100644 backend/python/coqui/backend_pb2.py
delete mode 100644 backend/python/coqui/backend_pb2_grpc.py
delete mode 100644 backend/python/diffusers/backend_pb2.py
delete mode 100644 backend/python/diffusers/backend_pb2_grpc.py
delete mode 100644 backend/python/exllama/backend_pb2.py
delete mode 100644 backend/python/exllama/backend_pb2_grpc.py
delete mode 100644 backend/python/exllama2/backend_pb2.py
delete mode 100644 backend/python/exllama2/backend_pb2_grpc.py
delete mode 100644 backend/python/mamba/backend_pb2.py
delete mode 100644 backend/python/mamba/backend_pb2_grpc.py
delete mode 100644 backend/python/petals/backend_pb2.py
delete mode 100644 backend/python/petals/backend_pb2_grpc.py
delete mode 100644 backend/python/sentencetransformers/backend_pb2.py
delete mode 100644 backend/python/sentencetransformers/backend_pb2_grpc.py
delete mode 100644 backend/python/transformers-musicgen/backend_pb2.py
delete mode 100644 backend/python/transformers-musicgen/backend_pb2_grpc.py
delete mode 100644 backend/python/transformers/backend_pb2.py
delete mode 100644 backend/python/transformers/backend_pb2_grpc.py
delete mode 100644 backend/python/vall-e-x/backend_pb2.py
delete mode 100644 backend/python/vall-e-x/backend_pb2_grpc.py
delete mode 100644 backend/python/vllm/backend_pb2.py
delete mode 100644 backend/python/vllm/backend_pb2_grpc.py
delete mode 100644 pkg/grpc/proto/backend.pb.go
delete mode 100644 pkg/grpc/proto/backend_grpc.pb.go
diff --git a/.dockerignore b/.dockerignore
index 97e8aa34..2c394c48 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,4 +1,6 @@
.idea
+.github
+.vscode
models
examples/chatbot-ui/models
examples/rwkv/models
diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml
index 7689f06d..7705783e 100644
--- a/.github/workflows/test-extra.yml
+++ b/.github/workflows/test-extra.yml
@@ -32,8 +32,9 @@ jobs:
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
sudo apt-get update && \
sudo apt-get install -y conda
- sudo apt-get install -y ca-certificates cmake curl patch
+ sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev
+ pip install --user grpcio-tools
sudo rm -rfv /usr/bin/conda || true
@@ -61,8 +62,9 @@ jobs:
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
sudo apt-get update && \
sudo apt-get install -y conda
- sudo apt-get install -y ca-certificates cmake curl patch
+ sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev
+ pip install --user grpcio-tools
sudo rm -rfv /usr/bin/conda || true
@@ -90,8 +92,9 @@ jobs:
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
sudo apt-get update && \
sudo apt-get install -y conda
- sudo apt-get install -y ca-certificates cmake curl patch
+ sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev
+ pip install --user grpcio-tools
sudo rm -rfv /usr/bin/conda || true
@@ -120,8 +123,9 @@ jobs:
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
sudo apt-get update && \
sudo apt-get install -y conda
- sudo apt-get install -y ca-certificates cmake curl patch
+ sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev
+ pip install --user grpcio-tools
sudo rm -rfv /usr/bin/conda || true
@@ -151,8 +155,9 @@ jobs:
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
# sudo apt-get update && \
# sudo apt-get install -y conda
- # sudo apt-get install -y ca-certificates cmake curl patch
+ # sudo apt-get install -y ca-certificates cmake curl patch python3-pip
# sudo apt-get install -y libopencv-dev
+ # pip install --user grpcio-tools
# sudo rm -rfv /usr/bin/conda || true
@@ -222,8 +227,9 @@ jobs:
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
# sudo apt-get update && \
# sudo apt-get install -y conda
- # sudo apt-get install -y ca-certificates cmake curl patch
+ # sudo apt-get install -y ca-certificates cmake curl patch python3-pip
# sudo apt-get install -y libopencv-dev
+ # pip install --user grpcio-tools
# sudo rm -rfv /usr/bin/conda || true
@@ -254,8 +260,9 @@ jobs:
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
# sudo apt-get update && \
# sudo apt-get install -y conda
- # sudo apt-get install -y ca-certificates cmake curl patch
+ # sudo apt-get install -y ca-certificates cmake curl patch python3-pip
# sudo apt-get install -y libopencv-dev
+ # pip install --user grpcio-tools
# sudo rm -rfv /usr/bin/conda || true
# - name: Test vllm
# run: |
@@ -280,8 +287,9 @@ jobs:
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
sudo apt-get update && \
sudo apt-get install -y conda
- sudo apt-get install -y ca-certificates cmake curl patch
- sudo apt-get install -y libopencv-dev
+ sudo apt-get install -y ca-certificates cmake curl patch python3-pip
+ sudo apt-get install -y libopencv-dev
+ pip install --user grpcio-tools
sudo rm -rfv /usr/bin/conda || true
- name: Test vall-e-x
run: |
@@ -307,7 +315,8 @@ jobs:
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
sudo apt-get update && \
sudo apt-get install -y conda
- sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng
+ sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng python3-pip
+ pip install --user grpcio-tools
sudo rm -rfv /usr/bin/conda || true
- name: Test coqui
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 02093b3f..46c4e065 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -70,17 +70,27 @@ jobs:
- name: Dependencies
run: |
sudo apt-get update
- sudo apt-get install build-essential ffmpeg
+ sudo apt-get install build-essential curl ffmpeg
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
- gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
+ gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
sudo apt-get update && \
sudo apt-get install -y conda
- sudo apt-get install -y ca-certificates cmake curl patch
+ sudo apt-get install -y ca-certificates cmake patch python3-pip unzip
sudo apt-get install -y libopencv-dev
-
+
+ curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
+ unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
+ rm protoc.zip
+
+ go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
+ go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
+
+ # The python3-grpc-tools package in 22.04 is too old
+ pip install --user grpcio-tools
+
sudo rm -rfv /usr/bin/conda || true
PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers
@@ -89,7 +99,7 @@ jobs:
GO_TAGS="tts" make -C sources/go-piper piper.o && \
sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \
# Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
- GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
+ PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
- name: Cache grpc
id: cache-grpc
uses: actions/cache@v4
@@ -108,7 +118,7 @@ jobs:
cd grpc && cd cmake/build && sudo make --jobs 5 install
- name: Test
run: |
- GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
+ PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3
@@ -186,7 +196,8 @@ jobs:
run: go version
- name: Dependencies
run: |
- brew install protobuf grpc make
+ brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc
+ pip install --user grpcio-tools
- name: Test
run: |
export C_INCLUDE_PATH=/usr/local/include
diff --git a/.gitignore b/.gitignore
index b48f7391..f1f860e9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -39,3 +39,8 @@ backend-assets/*
!backend-assets/.keep
prepare
/ggml-metal.metal
+
+# Protobuf generated files
+*.pb.go
+*pb2.py
+*pb2_grpc.py
diff --git a/Dockerfile b/Dockerfile
index 5fb6230c..d0217d50 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -20,12 +20,25 @@ ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface
ARG GO_TAGS="stablediffusion tinydream tts"
RUN apt-get update && \
- apt-get install -y ca-certificates curl patch pip cmake git && apt-get clean
+ apt-get install -y ca-certificates curl python3-pip unzip && apt-get clean
# Install Go
RUN curl -L -s https://go.dev/dl/go$GO_VERSION.linux-$TARGETARCH.tar.gz | tar -C /usr/local -xz
ENV PATH $PATH:/usr/local/go/bin
+# Install grpc compilers
+ENV PATH $PATH:/root/go/bin
+RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@latest && \
+ go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
+
+# Install protobuf (the version in 22.04 is too old)
+RUN curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
+ unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
+ rm protoc.zip
+
+# Install grpcio-tools (the version in 22.04 is too old)
+RUN pip install --user grpcio-tools
+
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
RUN update-ca-certificates
@@ -68,7 +81,8 @@ RUN test -n "$TARGETARCH" \
FROM requirements-core as requirements-extras
-RUN curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
+RUN apt install -y gpg && \
+ curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list && \
@@ -100,7 +114,7 @@ ENV MAKEFLAGS=${MAKEFLAGS}
WORKDIR /build
RUN apt-get update && \
- apt-get install -y g++ cmake git && \
+ apt-get install -y build-essential cmake git && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
@@ -133,6 +147,12 @@ WORKDIR /build
COPY . .
COPY .git .
RUN echo "GO_TAGS: $GO_TAGS"
+
+RUN apt-get update && \
+ apt-get install -y build-essential cmake git && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists/*
+
RUN make prepare
# If we are building with clblas support, we need the libraries for the builds
@@ -191,6 +211,11 @@ RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
apt-get clean \
; fi
+RUN apt-get update && \
+ apt-get install -y cmake git && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists/*
+
WORKDIR /build
# we start fresh & re-copy all assets because `make build` does not clean up nicely after itself
@@ -202,7 +227,7 @@ COPY . .
COPY --from=builder /build/sources ./sources/
COPY --from=grpc /build/grpc ./grpc/
-RUN make prepare-sources && cd /build/grpc/cmake/build && make install && rm -rf grpc
+RUN make prepare-sources && cd /build/grpc/cmake/build && make install && rm -rf /build/grpc
# Copy the binary
COPY --from=builder /build/local-ai ./
diff --git a/Makefile b/Makefile
index 0f6d8fd2..5932dfb2 100644
--- a/Makefile
+++ b/Makefile
@@ -289,10 +289,12 @@ clean: ## Remove build related file
rm -rf ./sources
rm -rf $(BINARY_NAME)
rm -rf release/
- rm -rf backend-assets
+ rm -rf backend-assets/*
$(MAKE) -C backend/cpp/grpc clean
$(MAKE) -C backend/cpp/llama clean
$(MAKE) dropreplace
+ $(MAKE) protogen-clean
+ rmdir pkg/grpc/proto || true
clean-tests:
rm -rf test-models
@@ -416,30 +418,136 @@ help: ## Show this help.
else if (/^## .*$$/) {printf " ${CYAN}%s${RESET}\n", substr($$1,4)} \
}' $(MAKEFILE_LIST)
+.PHONY: protogen
protogen: protogen-go protogen-python
+.PHONY: protogen-clean
+protogen-clean: protogen-go-clean protogen-python-clean
+
+.PHONY: protogen-go
protogen-go:
+ mkdir -p pkg/grpc/proto
protoc -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
backend/backend.proto
-protogen-python:
- python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/sentencetransformers/ --grpc_python_out=backend/python/sentencetransformers/ backend/backend.proto
- python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/transformers/ --grpc_python_out=backend/python/transformers/ backend/backend.proto
- python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/transformers-musicgen/ --grpc_python_out=backend/python/transformers-musicgen/ backend/backend.proto
- python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/autogptq/ --grpc_python_out=backend/python/autogptq/ backend/backend.proto
- python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/exllama/ --grpc_python_out=backend/python/exllama/ backend/backend.proto
- python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/bark/ --grpc_python_out=backend/python/bark/ backend/backend.proto
- python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/diffusers/ --grpc_python_out=backend/python/diffusers/ backend/backend.proto
- python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/coqui/ --grpc_python_out=backend/python/coqui/ backend/backend.proto
- python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/vall-e-x/ --grpc_python_out=backend/python/vall-e-x/ backend/backend.proto
- python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/vllm/ --grpc_python_out=backend/python/vllm/ backend/backend.proto
- python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/petals/ --grpc_python_out=backend/python/petals/ backend/backend.proto
- python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/mamba/ --grpc_python_out=backend/python/mamba/ backend/backend.proto
- python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/exllama2/ --grpc_python_out=backend/python/exllama2/ backend/backend.proto
+.PHONY: protogen-go-clean
+protogen-go-clean:
+ $(RM) pkg/grpc/proto/backend.pb.go pkg/grpc/proto/backend_grpc.pb.go
+ $(RM) bin/*
+
+.PHONY: protogen-python
+protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen sentencetransformers-protogen transformers-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen
+
+.PHONY: protogen-python-clean
+protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean
+
+.PHONY: autogptq-protogen
+autogptq-protogen:
+ $(MAKE) -C backend/python/autogptq protogen
+
+.PHONY: autogptq-protogen-clean
+autogptq-protogen-clean:
+ $(MAKE) -C backend/python/autogptq protogen-clean
+
+.PHONY: bark-protogen
+bark-protogen:
+ $(MAKE) -C backend/python/bark protogen
+
+.PHONY: bark-protogen-clean
+bark-protogen-clean:
+ $(MAKE) -C backend/python/bark protogen-clean
+
+.PHONY: coqui-protogen
+coqui-protogen:
+ $(MAKE) -C backend/python/coqui protogen
+
+.PHONY: coqui-protogen-clean
+coqui-protogen-clean:
+ $(MAKE) -C backend/python/coqui protogen-clean
+
+.PHONY: diffusers-protogen
+diffusers-protogen:
+ $(MAKE) -C backend/python/diffusers protogen
+
+.PHONY: diffusers-protogen-clean
+diffusers-protogen-clean:
+ $(MAKE) -C backend/python/diffusers protogen-clean
+
+.PHONY: exllama-protogen
+exllama-protogen:
+ $(MAKE) -C backend/python/exllama protogen
+
+.PHONY: exllama-protogen-clean
+exllama-protogen-clean:
+ $(MAKE) -C backend/python/exllama protogen-clean
+
+.PHONY: exllama2-protogen
+exllama2-protogen:
+ $(MAKE) -C backend/python/exllama2 protogen
+
+.PHONY: exllama2-protogen-clean
+exllama2-protogen-clean:
+ $(MAKE) -C backend/python/exllama2 protogen-clean
+
+.PHONY: mamba-protogen
+mamba-protogen:
+ $(MAKE) -C backend/python/mamba protogen
+
+.PHONY: mamba-protogen-clean
+mamba-protogen-clean:
+ $(MAKE) -C backend/python/mamba protogen-clean
+
+.PHONY: petals-protogen
+petals-protogen:
+ $(MAKE) -C backend/python/petals protogen
+
+.PHONY: petals-protogen-clean
+petals-protogen-clean:
+ $(MAKE) -C backend/python/petals protogen-clean
+
+.PHONY: sentencetransformers-protogen
+sentencetransformers-protogen:
+ $(MAKE) -C backend/python/sentencetransformers protogen
+
+.PHONY: sentencetransformers-protogen-clean
+sentencetransformers-protogen-clean:
+ $(MAKE) -C backend/python/sentencetransformers protogen-clean
+
+.PHONY: transformers-protogen
+transformers-protogen:
+ $(MAKE) -C backend/python/transformers protogen
+
+.PHONY: transformers-protogen-clean
+transformers-protogen-clean:
+ $(MAKE) -C backend/python/transformers protogen-clean
+
+.PHONY: transformers-musicgen-protogen
+transformers-musicgen-protogen:
+ $(MAKE) -C backend/python/transformers-musicgen protogen
+
+.PHONY: transformers-musicgen-protogen-clean
+transformers-musicgen-protogen-clean:
+ $(MAKE) -C backend/python/transformers-musicgen protogen-clean
+
+.PHONY: vall-e-x-protogen
+vall-e-x-protogen:
+ $(MAKE) -C backend/python/vall-e-x protogen
+
+.PHONY: vall-e-x-protogen-clean
+vall-e-x-protogen-clean:
+ $(MAKE) -C backend/python/vall-e-x protogen-clean
+
+.PHONY: vllm-protogen
+vllm-protogen:
+ $(MAKE) -C backend/python/vllm protogen
+
+.PHONY: vllm-protogen-clean
+vllm-protogen-clean:
+ $(MAKE) -C backend/python/vllm protogen-clean
## GRPC
# Note: it is duplicated in the Dockerfile
-prepare-extra-conda-environments:
+prepare-extra-conda-environments: protogen-python
$(MAKE) -C backend/python/autogptq
$(MAKE) -C backend/python/bark
$(MAKE) -C backend/python/coqui
@@ -454,7 +562,7 @@ prepare-extra-conda-environments:
$(MAKE) -C backend/python/petals
$(MAKE) -C backend/python/exllama2
-prepare-test-extra:
+prepare-test-extra: protogen-python
$(MAKE) -C backend/python/transformers
$(MAKE) -C backend/python/diffusers
@@ -478,7 +586,7 @@ backend-assets/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true
-backend-assets/grpc: replace
+backend-assets/grpc: protogen-go replace
mkdir -p backend-assets/grpc
backend-assets/grpc/bert-embeddings: sources/go-bert sources/go-bert/libgobert.a backend-assets/grpc
diff --git a/backend/backend_grpc.pb.go b/backend/backend_grpc.pb.go
deleted file mode 100644
index 5c97691d..00000000
--- a/backend/backend_grpc.pb.go
+++ /dev/null
@@ -1,457 +0,0 @@
-// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
-// versions:
-// - protoc-gen-go-grpc v1.2.0
-// - protoc v4.23.4
-// source: backend/backend.proto
-
-package proto
-
-import (
- context "context"
- grpc "google.golang.org/grpc"
- codes "google.golang.org/grpc/codes"
- status "google.golang.org/grpc/status"
-)
-
-// This is a compile-time assertion to ensure that this generated file
-// is compatible with the grpc package it is being compiled against.
-// Requires gRPC-Go v1.32.0 or later.
-const _ = grpc.SupportPackageIsVersion7
-
-// BackendClient is the client API for Backend service.
-//
-// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
-type BackendClient interface {
- Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error)
- Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error)
- LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error)
- PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (Backend_PredictStreamClient, error)
- Embedding(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*EmbeddingResult, error)
- GenerateImage(ctx context.Context, in *GenerateImageRequest, opts ...grpc.CallOption) (*Result, error)
- AudioTranscription(ctx context.Context, in *TranscriptRequest, opts ...grpc.CallOption) (*TranscriptResult, error)
- TTS(ctx context.Context, in *TTSRequest, opts ...grpc.CallOption) (*Result, error)
- TokenizeString(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*TokenizationResponse, error)
- Status(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*StatusResponse, error)
-}
-
-type backendClient struct {
- cc grpc.ClientConnInterface
-}
-
-func NewBackendClient(cc grpc.ClientConnInterface) BackendClient {
- return &backendClient{cc}
-}
-
-func (c *backendClient) Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error) {
- out := new(Reply)
- err := c.cc.Invoke(ctx, "/backend.Backend/Health", in, out, opts...)
- if err != nil {
- return nil, err
- }
- return out, nil
-}
-
-func (c *backendClient) Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error) {
- out := new(Reply)
- err := c.cc.Invoke(ctx, "/backend.Backend/Predict", in, out, opts...)
- if err != nil {
- return nil, err
- }
- return out, nil
-}
-
-func (c *backendClient) LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error) {
- out := new(Result)
- err := c.cc.Invoke(ctx, "/backend.Backend/LoadModel", in, out, opts...)
- if err != nil {
- return nil, err
- }
- return out, nil
-}
-
-func (c *backendClient) PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (Backend_PredictStreamClient, error) {
- stream, err := c.cc.NewStream(ctx, &Backend_ServiceDesc.Streams[0], "/backend.Backend/PredictStream", opts...)
- if err != nil {
- return nil, err
- }
- x := &backendPredictStreamClient{stream}
- if err := x.ClientStream.SendMsg(in); err != nil {
- return nil, err
- }
- if err := x.ClientStream.CloseSend(); err != nil {
- return nil, err
- }
- return x, nil
-}
-
-type Backend_PredictStreamClient interface {
- Recv() (*Reply, error)
- grpc.ClientStream
-}
-
-type backendPredictStreamClient struct {
- grpc.ClientStream
-}
-
-func (x *backendPredictStreamClient) Recv() (*Reply, error) {
- m := new(Reply)
- if err := x.ClientStream.RecvMsg(m); err != nil {
- return nil, err
- }
- return m, nil
-}
-
-func (c *backendClient) Embedding(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*EmbeddingResult, error) {
- out := new(EmbeddingResult)
- err := c.cc.Invoke(ctx, "/backend.Backend/Embedding", in, out, opts...)
- if err != nil {
- return nil, err
- }
- return out, nil
-}
-
-func (c *backendClient) GenerateImage(ctx context.Context, in *GenerateImageRequest, opts ...grpc.CallOption) (*Result, error) {
- out := new(Result)
- err := c.cc.Invoke(ctx, "/backend.Backend/GenerateImage", in, out, opts...)
- if err != nil {
- return nil, err
- }
- return out, nil
-}
-
-func (c *backendClient) AudioTranscription(ctx context.Context, in *TranscriptRequest, opts ...grpc.CallOption) (*TranscriptResult, error) {
- out := new(TranscriptResult)
- err := c.cc.Invoke(ctx, "/backend.Backend/AudioTranscription", in, out, opts...)
- if err != nil {
- return nil, err
- }
- return out, nil
-}
-
-func (c *backendClient) TTS(ctx context.Context, in *TTSRequest, opts ...grpc.CallOption) (*Result, error) {
- out := new(Result)
- err := c.cc.Invoke(ctx, "/backend.Backend/TTS", in, out, opts...)
- if err != nil {
- return nil, err
- }
- return out, nil
-}
-
-func (c *backendClient) TokenizeString(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*TokenizationResponse, error) {
- out := new(TokenizationResponse)
- err := c.cc.Invoke(ctx, "/backend.Backend/TokenizeString", in, out, opts...)
- if err != nil {
- return nil, err
- }
- return out, nil
-}
-
-func (c *backendClient) Status(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*StatusResponse, error) {
- out := new(StatusResponse)
- err := c.cc.Invoke(ctx, "/backend.Backend/Status", in, out, opts...)
- if err != nil {
- return nil, err
- }
- return out, nil
-}
-
-// BackendServer is the server API for Backend service.
-// All implementations must embed UnimplementedBackendServer
-// for forward compatibility
-type BackendServer interface {
- Health(context.Context, *HealthMessage) (*Reply, error)
- Predict(context.Context, *PredictOptions) (*Reply, error)
- LoadModel(context.Context, *ModelOptions) (*Result, error)
- PredictStream(*PredictOptions, Backend_PredictStreamServer) error
- Embedding(context.Context, *PredictOptions) (*EmbeddingResult, error)
- GenerateImage(context.Context, *GenerateImageRequest) (*Result, error)
- AudioTranscription(context.Context, *TranscriptRequest) (*TranscriptResult, error)
- TTS(context.Context, *TTSRequest) (*Result, error)
- TokenizeString(context.Context, *PredictOptions) (*TokenizationResponse, error)
- Status(context.Context, *HealthMessage) (*StatusResponse, error)
- mustEmbedUnimplementedBackendServer()
-}
-
-// UnimplementedBackendServer must be embedded to have forward compatible implementations.
-type UnimplementedBackendServer struct {
-}
-
-func (UnimplementedBackendServer) Health(context.Context, *HealthMessage) (*Reply, error) {
- return nil, status.Errorf(codes.Unimplemented, "method Health not implemented")
-}
-func (UnimplementedBackendServer) Predict(context.Context, *PredictOptions) (*Reply, error) {
- return nil, status.Errorf(codes.Unimplemented, "method Predict not implemented")
-}
-func (UnimplementedBackendServer) LoadModel(context.Context, *ModelOptions) (*Result, error) {
- return nil, status.Errorf(codes.Unimplemented, "method LoadModel not implemented")
-}
-func (UnimplementedBackendServer) PredictStream(*PredictOptions, Backend_PredictStreamServer) error {
- return status.Errorf(codes.Unimplemented, "method PredictStream not implemented")
-}
-func (UnimplementedBackendServer) Embedding(context.Context, *PredictOptions) (*EmbeddingResult, error) {
- return nil, status.Errorf(codes.Unimplemented, "method Embedding not implemented")
-}
-func (UnimplementedBackendServer) GenerateImage(context.Context, *GenerateImageRequest) (*Result, error) {
- return nil, status.Errorf(codes.Unimplemented, "method GenerateImage not implemented")
-}
-func (UnimplementedBackendServer) AudioTranscription(context.Context, *TranscriptRequest) (*TranscriptResult, error) {
- return nil, status.Errorf(codes.Unimplemented, "method AudioTranscription not implemented")
-}
-func (UnimplementedBackendServer) TTS(context.Context, *TTSRequest) (*Result, error) {
- return nil, status.Errorf(codes.Unimplemented, "method TTS not implemented")
-}
-func (UnimplementedBackendServer) TokenizeString(context.Context, *PredictOptions) (*TokenizationResponse, error) {
- return nil, status.Errorf(codes.Unimplemented, "method TokenizeString not implemented")
-}
-func (UnimplementedBackendServer) Status(context.Context, *HealthMessage) (*StatusResponse, error) {
- return nil, status.Errorf(codes.Unimplemented, "method Status not implemented")
-}
-func (UnimplementedBackendServer) mustEmbedUnimplementedBackendServer() {}
-
-// UnsafeBackendServer may be embedded to opt out of forward compatibility for this service.
-// Use of this interface is not recommended, as added methods to BackendServer will
-// result in compilation errors.
-type UnsafeBackendServer interface {
- mustEmbedUnimplementedBackendServer()
-}
-
-func RegisterBackendServer(s grpc.ServiceRegistrar, srv BackendServer) {
- s.RegisterService(&Backend_ServiceDesc, srv)
-}
-
-func _Backend_Health_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
- in := new(HealthMessage)
- if err := dec(in); err != nil {
- return nil, err
- }
- if interceptor == nil {
- return srv.(BackendServer).Health(ctx, in)
- }
- info := &grpc.UnaryServerInfo{
- Server: srv,
- FullMethod: "/backend.Backend/Health",
- }
- handler := func(ctx context.Context, req interface{}) (interface{}, error) {
- return srv.(BackendServer).Health(ctx, req.(*HealthMessage))
- }
- return interceptor(ctx, in, info, handler)
-}
-
-func _Backend_Predict_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
- in := new(PredictOptions)
- if err := dec(in); err != nil {
- return nil, err
- }
- if interceptor == nil {
- return srv.(BackendServer).Predict(ctx, in)
- }
- info := &grpc.UnaryServerInfo{
- Server: srv,
- FullMethod: "/backend.Backend/Predict",
- }
- handler := func(ctx context.Context, req interface{}) (interface{}, error) {
- return srv.(BackendServer).Predict(ctx, req.(*PredictOptions))
- }
- return interceptor(ctx, in, info, handler)
-}
-
-func _Backend_LoadModel_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
- in := new(ModelOptions)
- if err := dec(in); err != nil {
- return nil, err
- }
- if interceptor == nil {
- return srv.(BackendServer).LoadModel(ctx, in)
- }
- info := &grpc.UnaryServerInfo{
- Server: srv,
- FullMethod: "/backend.Backend/LoadModel",
- }
- handler := func(ctx context.Context, req interface{}) (interface{}, error) {
- return srv.(BackendServer).LoadModel(ctx, req.(*ModelOptions))
- }
- return interceptor(ctx, in, info, handler)
-}
-
-func _Backend_PredictStream_Handler(srv interface{}, stream grpc.ServerStream) error {
- m := new(PredictOptions)
- if err := stream.RecvMsg(m); err != nil {
- return err
- }
- return srv.(BackendServer).PredictStream(m, &backendPredictStreamServer{stream})
-}
-
-type Backend_PredictStreamServer interface {
- Send(*Reply) error
- grpc.ServerStream
-}
-
-type backendPredictStreamServer struct {
- grpc.ServerStream
-}
-
-func (x *backendPredictStreamServer) Send(m *Reply) error {
- return x.ServerStream.SendMsg(m)
-}
-
-func _Backend_Embedding_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
- in := new(PredictOptions)
- if err := dec(in); err != nil {
- return nil, err
- }
- if interceptor == nil {
- return srv.(BackendServer).Embedding(ctx, in)
- }
- info := &grpc.UnaryServerInfo{
- Server: srv,
- FullMethod: "/backend.Backend/Embedding",
- }
- handler := func(ctx context.Context, req interface{}) (interface{}, error) {
- return srv.(BackendServer).Embedding(ctx, req.(*PredictOptions))
- }
- return interceptor(ctx, in, info, handler)
-}
-
-func _Backend_GenerateImage_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
- in := new(GenerateImageRequest)
- if err := dec(in); err != nil {
- return nil, err
- }
- if interceptor == nil {
- return srv.(BackendServer).GenerateImage(ctx, in)
- }
- info := &grpc.UnaryServerInfo{
- Server: srv,
- FullMethod: "/backend.Backend/GenerateImage",
- }
- handler := func(ctx context.Context, req interface{}) (interface{}, error) {
- return srv.(BackendServer).GenerateImage(ctx, req.(*GenerateImageRequest))
- }
- return interceptor(ctx, in, info, handler)
-}
-
-func _Backend_AudioTranscription_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
- in := new(TranscriptRequest)
- if err := dec(in); err != nil {
- return nil, err
- }
- if interceptor == nil {
- return srv.(BackendServer).AudioTranscription(ctx, in)
- }
- info := &grpc.UnaryServerInfo{
- Server: srv,
- FullMethod: "/backend.Backend/AudioTranscription",
- }
- handler := func(ctx context.Context, req interface{}) (interface{}, error) {
- return srv.(BackendServer).AudioTranscription(ctx, req.(*TranscriptRequest))
- }
- return interceptor(ctx, in, info, handler)
-}
-
-func _Backend_TTS_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
- in := new(TTSRequest)
- if err := dec(in); err != nil {
- return nil, err
- }
- if interceptor == nil {
- return srv.(BackendServer).TTS(ctx, in)
- }
- info := &grpc.UnaryServerInfo{
- Server: srv,
- FullMethod: "/backend.Backend/TTS",
- }
- handler := func(ctx context.Context, req interface{}) (interface{}, error) {
- return srv.(BackendServer).TTS(ctx, req.(*TTSRequest))
- }
- return interceptor(ctx, in, info, handler)
-}
-
-func _Backend_TokenizeString_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
- in := new(PredictOptions)
- if err := dec(in); err != nil {
- return nil, err
- }
- if interceptor == nil {
- return srv.(BackendServer).TokenizeString(ctx, in)
- }
- info := &grpc.UnaryServerInfo{
- Server: srv,
- FullMethod: "/backend.Backend/TokenizeString",
- }
- handler := func(ctx context.Context, req interface{}) (interface{}, error) {
- return srv.(BackendServer).TokenizeString(ctx, req.(*PredictOptions))
- }
- return interceptor(ctx, in, info, handler)
-}
-
-func _Backend_Status_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
- in := new(HealthMessage)
- if err := dec(in); err != nil {
- return nil, err
- }
- if interceptor == nil {
- return srv.(BackendServer).Status(ctx, in)
- }
- info := &grpc.UnaryServerInfo{
- Server: srv,
- FullMethod: "/backend.Backend/Status",
- }
- handler := func(ctx context.Context, req interface{}) (interface{}, error) {
- return srv.(BackendServer).Status(ctx, req.(*HealthMessage))
- }
- return interceptor(ctx, in, info, handler)
-}
-
-// Backend_ServiceDesc is the grpc.ServiceDesc for Backend service.
-// It's only intended for direct use with grpc.RegisterService,
-// and not to be introspected or modified (even as a copy)
-var Backend_ServiceDesc = grpc.ServiceDesc{
- ServiceName: "backend.Backend",
- HandlerType: (*BackendServer)(nil),
- Methods: []grpc.MethodDesc{
- {
- MethodName: "Health",
- Handler: _Backend_Health_Handler,
- },
- {
- MethodName: "Predict",
- Handler: _Backend_Predict_Handler,
- },
- {
- MethodName: "LoadModel",
- Handler: _Backend_LoadModel_Handler,
- },
- {
- MethodName: "Embedding",
- Handler: _Backend_Embedding_Handler,
- },
- {
- MethodName: "GenerateImage",
- Handler: _Backend_GenerateImage_Handler,
- },
- {
- MethodName: "AudioTranscription",
- Handler: _Backend_AudioTranscription_Handler,
- },
- {
- MethodName: "TTS",
- Handler: _Backend_TTS_Handler,
- },
- {
- MethodName: "TokenizeString",
- Handler: _Backend_TokenizeString_Handler,
- },
- {
- MethodName: "Status",
- Handler: _Backend_Status_Handler,
- },
- },
- Streams: []grpc.StreamDesc{
- {
- StreamName: "PredictStream",
- Handler: _Backend_PredictStream_Handler,
- ServerStreams: true,
- },
- },
- Metadata: "backend/backend.proto",
-}
diff --git a/backend/python/autogptq/Makefile b/backend/python/autogptq/Makefile
index dfae12c1..eb81f045 100644
--- a/backend/python/autogptq/Makefile
+++ b/backend/python/autogptq/Makefile
@@ -1,4 +1,13 @@
.PHONY: autogptq
-autogptq:
+autogptq: protogen
$(MAKE) -C ../common-env/transformers
+.PHONY: protogen
+protogen: backend_pb2_grpc.py backend_pb2.py
+
+.PHONY: protogen-clean
+protogen-clean:
+ $(RM) backend_pb2_grpc.py backend_pb2.py
+
+backend_pb2_grpc.py backend_pb2.py:
+ python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
\ No newline at end of file
diff --git a/backend/python/autogptq/backend_pb2.py b/backend/python/autogptq/backend_pb2.py
deleted file mode 100644
index 24b6de3b..00000000
--- a/backend/python/autogptq/backend_pb2.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: backend.proto
-# Protobuf Python Version: 4.25.1
-"""Generated protocol buffer code."""
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import descriptor_pool as _descriptor_pool
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf.internal import builder as _builder
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
-
-_globals = globals()
-_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
-_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals)
-if _descriptor._USE_C_DESCRIPTORS == False:
- _globals['DESCRIPTOR']._options = None
- _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001'
- _globals['_STORESKEY']._serialized_start=26
- _globals['_STORESKEY']._serialized_end=53
- _globals['_STORESVALUE']._serialized_start=55
- _globals['_STORESVALUE']._serialized_end=83
- _globals['_STORESSETOPTIONS']._serialized_start=85
- _globals['_STORESSETOPTIONS']._serialized_end=175
- _globals['_STORESDELETEOPTIONS']._serialized_start=177
- _globals['_STORESDELETEOPTIONS']._serialized_end=232
- _globals['_STORESGETOPTIONS']._serialized_start=234
- _globals['_STORESGETOPTIONS']._serialized_end=286
- _globals['_STORESGETRESULT']._serialized_start=288
- _globals['_STORESGETRESULT']._serialized_end=377
- _globals['_STORESFINDOPTIONS']._serialized_start=379
- _globals['_STORESFINDOPTIONS']._serialized_end=445
- _globals['_STORESFINDRESULT']._serialized_start=447
- _globals['_STORESFINDRESULT']._serialized_end=559
- _globals['_HEALTHMESSAGE']._serialized_start=561
- _globals['_HEALTHMESSAGE']._serialized_end=576
- _globals['_PREDICTOPTIONS']._serialized_start=579
- _globals['_PREDICTOPTIONS']._serialized_end=1451
- _globals['_REPLY']._serialized_start=1453
- _globals['_REPLY']._serialized_end=1477
- _globals['_MODELOPTIONS']._serialized_start=1480
- _globals['_MODELOPTIONS']._serialized_end=2552
- _globals['_RESULT']._serialized_start=2554
- _globals['_RESULT']._serialized_end=2596
- _globals['_EMBEDDINGRESULT']._serialized_start=2598
- _globals['_EMBEDDINGRESULT']._serialized_end=2635
- _globals['_TRANSCRIPTREQUEST']._serialized_start=2637
- _globals['_TRANSCRIPTREQUEST']._serialized_end=2704
- _globals['_TRANSCRIPTRESULT']._serialized_start=2706
- _globals['_TRANSCRIPTRESULT']._serialized_end=2784
- _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786
- _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875
- _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878
- _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093
- _globals['_TTSREQUEST']._serialized_start=3095
- _globals['_TTSREQUEST']._serialized_end=3164
- _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166
- _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220
- _globals['_MEMORYUSAGEDATA']._serialized_start=3223
- _globals['_MEMORYUSAGEDATA']._serialized_end=3365
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365
- _globals['_STATUSRESPONSE']._serialized_start=3368
- _globals['_STATUSRESPONSE']._serialized_end=3541
- _globals['_STATUSRESPONSE_STATE']._serialized_start=3474
- _globals['_STATUSRESPONSE_STATE']._serialized_end=3541
- _globals['_MESSAGE']._serialized_start=3543
- _globals['_MESSAGE']._serialized_end=3583
- _globals['_BACKEND']._serialized_start=3586
- _globals['_BACKEND']._serialized_end=4477
-# @@protoc_insertion_point(module_scope)
diff --git a/backend/python/autogptq/backend_pb2_grpc.py b/backend/python/autogptq/backend_pb2_grpc.py
deleted file mode 100644
index e06fccf3..00000000
--- a/backend/python/autogptq/backend_pb2_grpc.py
+++ /dev/null
@@ -1,495 +0,0 @@
-# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
-"""Client and server classes corresponding to protobuf-defined services."""
-import grpc
-
-import backend_pb2 as backend__pb2
-
-
-class BackendStub(object):
- """Missing associated documentation comment in .proto file."""
-
- def __init__(self, channel):
- """Constructor.
-
- Args:
- channel: A grpc.Channel.
- """
- self.Health = channel.unary_unary(
- '/backend.Backend/Health',
- request_serializer=backend__pb2.HealthMessage.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.Predict = channel.unary_unary(
- '/backend.Backend/Predict',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.LoadModel = channel.unary_unary(
- '/backend.Backend/LoadModel',
- request_serializer=backend__pb2.ModelOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.PredictStream = channel.unary_stream(
- '/backend.Backend/PredictStream',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.Embedding = channel.unary_unary(
- '/backend.Backend/Embedding',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.EmbeddingResult.FromString,
- )
- self.GenerateImage = channel.unary_unary(
- '/backend.Backend/GenerateImage',
- request_serializer=backend__pb2.GenerateImageRequest.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.AudioTranscription = channel.unary_unary(
- '/backend.Backend/AudioTranscription',
- request_serializer=backend__pb2.TranscriptRequest.SerializeToString,
- response_deserializer=backend__pb2.TranscriptResult.FromString,
- )
- self.TTS = channel.unary_unary(
- '/backend.Backend/TTS',
- request_serializer=backend__pb2.TTSRequest.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.TokenizeString = channel.unary_unary(
- '/backend.Backend/TokenizeString',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.TokenizationResponse.FromString,
- )
- self.Status = channel.unary_unary(
- '/backend.Backend/Status',
- request_serializer=backend__pb2.HealthMessage.SerializeToString,
- response_deserializer=backend__pb2.StatusResponse.FromString,
- )
- self.StoresSet = channel.unary_unary(
- '/backend.Backend/StoresSet',
- request_serializer=backend__pb2.StoresSetOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.StoresDelete = channel.unary_unary(
- '/backend.Backend/StoresDelete',
- request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.StoresGet = channel.unary_unary(
- '/backend.Backend/StoresGet',
- request_serializer=backend__pb2.StoresGetOptions.SerializeToString,
- response_deserializer=backend__pb2.StoresGetResult.FromString,
- )
- self.StoresFind = channel.unary_unary(
- '/backend.Backend/StoresFind',
- request_serializer=backend__pb2.StoresFindOptions.SerializeToString,
- response_deserializer=backend__pb2.StoresFindResult.FromString,
- )
-
-
-class BackendServicer(object):
- """Missing associated documentation comment in .proto file."""
-
- def Health(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Predict(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def LoadModel(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def PredictStream(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Embedding(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def GenerateImage(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def AudioTranscription(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def TTS(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def TokenizeString(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Status(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresSet(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresDelete(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresGet(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresFind(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
-
-def add_BackendServicer_to_server(servicer, server):
- rpc_method_handlers = {
- 'Health': grpc.unary_unary_rpc_method_handler(
- servicer.Health,
- request_deserializer=backend__pb2.HealthMessage.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'Predict': grpc.unary_unary_rpc_method_handler(
- servicer.Predict,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'LoadModel': grpc.unary_unary_rpc_method_handler(
- servicer.LoadModel,
- request_deserializer=backend__pb2.ModelOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'PredictStream': grpc.unary_stream_rpc_method_handler(
- servicer.PredictStream,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'Embedding': grpc.unary_unary_rpc_method_handler(
- servicer.Embedding,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.EmbeddingResult.SerializeToString,
- ),
- 'GenerateImage': grpc.unary_unary_rpc_method_handler(
- servicer.GenerateImage,
- request_deserializer=backend__pb2.GenerateImageRequest.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'AudioTranscription': grpc.unary_unary_rpc_method_handler(
- servicer.AudioTranscription,
- request_deserializer=backend__pb2.TranscriptRequest.FromString,
- response_serializer=backend__pb2.TranscriptResult.SerializeToString,
- ),
- 'TTS': grpc.unary_unary_rpc_method_handler(
- servicer.TTS,
- request_deserializer=backend__pb2.TTSRequest.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'TokenizeString': grpc.unary_unary_rpc_method_handler(
- servicer.TokenizeString,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.TokenizationResponse.SerializeToString,
- ),
- 'Status': grpc.unary_unary_rpc_method_handler(
- servicer.Status,
- request_deserializer=backend__pb2.HealthMessage.FromString,
- response_serializer=backend__pb2.StatusResponse.SerializeToString,
- ),
- 'StoresSet': grpc.unary_unary_rpc_method_handler(
- servicer.StoresSet,
- request_deserializer=backend__pb2.StoresSetOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'StoresDelete': grpc.unary_unary_rpc_method_handler(
- servicer.StoresDelete,
- request_deserializer=backend__pb2.StoresDeleteOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'StoresGet': grpc.unary_unary_rpc_method_handler(
- servicer.StoresGet,
- request_deserializer=backend__pb2.StoresGetOptions.FromString,
- response_serializer=backend__pb2.StoresGetResult.SerializeToString,
- ),
- 'StoresFind': grpc.unary_unary_rpc_method_handler(
- servicer.StoresFind,
- request_deserializer=backend__pb2.StoresFindOptions.FromString,
- response_serializer=backend__pb2.StoresFindResult.SerializeToString,
- ),
- }
- generic_handler = grpc.method_handlers_generic_handler(
- 'backend.Backend', rpc_method_handlers)
- server.add_generic_rpc_handlers((generic_handler,))
-
-
- # This class is part of an EXPERIMENTAL API.
-class Backend(object):
- """Missing associated documentation comment in .proto file."""
-
- @staticmethod
- def Health(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health',
- backend__pb2.HealthMessage.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Predict(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def LoadModel(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel',
- backend__pb2.ModelOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def PredictStream(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Embedding(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.EmbeddingResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def GenerateImage(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage',
- backend__pb2.GenerateImageRequest.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def AudioTranscription(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription',
- backend__pb2.TranscriptRequest.SerializeToString,
- backend__pb2.TranscriptResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def TTS(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS',
- backend__pb2.TTSRequest.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def TokenizeString(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.TokenizationResponse.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Status(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status',
- backend__pb2.HealthMessage.SerializeToString,
- backend__pb2.StatusResponse.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresSet(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet',
- backend__pb2.StoresSetOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresDelete(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete',
- backend__pb2.StoresDeleteOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresGet(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet',
- backend__pb2.StoresGetOptions.SerializeToString,
- backend__pb2.StoresGetResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresFind(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind',
- backend__pb2.StoresFindOptions.SerializeToString,
- backend__pb2.StoresFindResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
diff --git a/backend/python/bark/Makefile b/backend/python/bark/Makefile
index 68f73b29..a16308f7 100644
--- a/backend/python/bark/Makefile
+++ b/backend/python/bark/Makefile
@@ -1,15 +1,25 @@
.PHONY: ttsbark
-ttsbark:
+ttsbark: protogen
$(MAKE) -C ../common-env/transformers
.PHONY: run
-run:
+run: protogen
@echo "Running bark..."
bash run.sh
@echo "bark run."
.PHONY: test
-test:
+test: protogen
@echo "Testing bark..."
bash test.sh
@echo "bark tested."
+
+.PHONY: protogen
+protogen: backend_pb2_grpc.py backend_pb2.py
+
+.PHONY: protogen-clean
+protogen-clean:
+ $(RM) backend_pb2_grpc.py backend_pb2.py
+
+backend_pb2_grpc.py backend_pb2.py:
+ python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
\ No newline at end of file
diff --git a/backend/python/bark/backend_pb2.py b/backend/python/bark/backend_pb2.py
deleted file mode 100644
index 24b6de3b..00000000
--- a/backend/python/bark/backend_pb2.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: backend.proto
-# Protobuf Python Version: 4.25.1
-"""Generated protocol buffer code."""
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import descriptor_pool as _descriptor_pool
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf.internal import builder as _builder
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
-
-_globals = globals()
-_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
-_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals)
-if _descriptor._USE_C_DESCRIPTORS == False:
- _globals['DESCRIPTOR']._options = None
- _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001'
- _globals['_STORESKEY']._serialized_start=26
- _globals['_STORESKEY']._serialized_end=53
- _globals['_STORESVALUE']._serialized_start=55
- _globals['_STORESVALUE']._serialized_end=83
- _globals['_STORESSETOPTIONS']._serialized_start=85
- _globals['_STORESSETOPTIONS']._serialized_end=175
- _globals['_STORESDELETEOPTIONS']._serialized_start=177
- _globals['_STORESDELETEOPTIONS']._serialized_end=232
- _globals['_STORESGETOPTIONS']._serialized_start=234
- _globals['_STORESGETOPTIONS']._serialized_end=286
- _globals['_STORESGETRESULT']._serialized_start=288
- _globals['_STORESGETRESULT']._serialized_end=377
- _globals['_STORESFINDOPTIONS']._serialized_start=379
- _globals['_STORESFINDOPTIONS']._serialized_end=445
- _globals['_STORESFINDRESULT']._serialized_start=447
- _globals['_STORESFINDRESULT']._serialized_end=559
- _globals['_HEALTHMESSAGE']._serialized_start=561
- _globals['_HEALTHMESSAGE']._serialized_end=576
- _globals['_PREDICTOPTIONS']._serialized_start=579
- _globals['_PREDICTOPTIONS']._serialized_end=1451
- _globals['_REPLY']._serialized_start=1453
- _globals['_REPLY']._serialized_end=1477
- _globals['_MODELOPTIONS']._serialized_start=1480
- _globals['_MODELOPTIONS']._serialized_end=2552
- _globals['_RESULT']._serialized_start=2554
- _globals['_RESULT']._serialized_end=2596
- _globals['_EMBEDDINGRESULT']._serialized_start=2598
- _globals['_EMBEDDINGRESULT']._serialized_end=2635
- _globals['_TRANSCRIPTREQUEST']._serialized_start=2637
- _globals['_TRANSCRIPTREQUEST']._serialized_end=2704
- _globals['_TRANSCRIPTRESULT']._serialized_start=2706
- _globals['_TRANSCRIPTRESULT']._serialized_end=2784
- _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786
- _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875
- _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878
- _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093
- _globals['_TTSREQUEST']._serialized_start=3095
- _globals['_TTSREQUEST']._serialized_end=3164
- _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166
- _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220
- _globals['_MEMORYUSAGEDATA']._serialized_start=3223
- _globals['_MEMORYUSAGEDATA']._serialized_end=3365
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365
- _globals['_STATUSRESPONSE']._serialized_start=3368
- _globals['_STATUSRESPONSE']._serialized_end=3541
- _globals['_STATUSRESPONSE_STATE']._serialized_start=3474
- _globals['_STATUSRESPONSE_STATE']._serialized_end=3541
- _globals['_MESSAGE']._serialized_start=3543
- _globals['_MESSAGE']._serialized_end=3583
- _globals['_BACKEND']._serialized_start=3586
- _globals['_BACKEND']._serialized_end=4477
-# @@protoc_insertion_point(module_scope)
diff --git a/backend/python/bark/backend_pb2_grpc.py b/backend/python/bark/backend_pb2_grpc.py
deleted file mode 100644
index e06fccf3..00000000
--- a/backend/python/bark/backend_pb2_grpc.py
+++ /dev/null
@@ -1,495 +0,0 @@
-# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
-"""Client and server classes corresponding to protobuf-defined services."""
-import grpc
-
-import backend_pb2 as backend__pb2
-
-
-class BackendStub(object):
- """Missing associated documentation comment in .proto file."""
-
- def __init__(self, channel):
- """Constructor.
-
- Args:
- channel: A grpc.Channel.
- """
- self.Health = channel.unary_unary(
- '/backend.Backend/Health',
- request_serializer=backend__pb2.HealthMessage.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.Predict = channel.unary_unary(
- '/backend.Backend/Predict',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.LoadModel = channel.unary_unary(
- '/backend.Backend/LoadModel',
- request_serializer=backend__pb2.ModelOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.PredictStream = channel.unary_stream(
- '/backend.Backend/PredictStream',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.Embedding = channel.unary_unary(
- '/backend.Backend/Embedding',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.EmbeddingResult.FromString,
- )
- self.GenerateImage = channel.unary_unary(
- '/backend.Backend/GenerateImage',
- request_serializer=backend__pb2.GenerateImageRequest.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.AudioTranscription = channel.unary_unary(
- '/backend.Backend/AudioTranscription',
- request_serializer=backend__pb2.TranscriptRequest.SerializeToString,
- response_deserializer=backend__pb2.TranscriptResult.FromString,
- )
- self.TTS = channel.unary_unary(
- '/backend.Backend/TTS',
- request_serializer=backend__pb2.TTSRequest.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.TokenizeString = channel.unary_unary(
- '/backend.Backend/TokenizeString',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.TokenizationResponse.FromString,
- )
- self.Status = channel.unary_unary(
- '/backend.Backend/Status',
- request_serializer=backend__pb2.HealthMessage.SerializeToString,
- response_deserializer=backend__pb2.StatusResponse.FromString,
- )
- self.StoresSet = channel.unary_unary(
- '/backend.Backend/StoresSet',
- request_serializer=backend__pb2.StoresSetOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.StoresDelete = channel.unary_unary(
- '/backend.Backend/StoresDelete',
- request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.StoresGet = channel.unary_unary(
- '/backend.Backend/StoresGet',
- request_serializer=backend__pb2.StoresGetOptions.SerializeToString,
- response_deserializer=backend__pb2.StoresGetResult.FromString,
- )
- self.StoresFind = channel.unary_unary(
- '/backend.Backend/StoresFind',
- request_serializer=backend__pb2.StoresFindOptions.SerializeToString,
- response_deserializer=backend__pb2.StoresFindResult.FromString,
- )
-
-
-class BackendServicer(object):
- """Missing associated documentation comment in .proto file."""
-
- def Health(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Predict(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def LoadModel(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def PredictStream(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Embedding(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def GenerateImage(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def AudioTranscription(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def TTS(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def TokenizeString(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Status(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresSet(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresDelete(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresGet(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresFind(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
-
-def add_BackendServicer_to_server(servicer, server):
- rpc_method_handlers = {
- 'Health': grpc.unary_unary_rpc_method_handler(
- servicer.Health,
- request_deserializer=backend__pb2.HealthMessage.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'Predict': grpc.unary_unary_rpc_method_handler(
- servicer.Predict,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'LoadModel': grpc.unary_unary_rpc_method_handler(
- servicer.LoadModel,
- request_deserializer=backend__pb2.ModelOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'PredictStream': grpc.unary_stream_rpc_method_handler(
- servicer.PredictStream,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'Embedding': grpc.unary_unary_rpc_method_handler(
- servicer.Embedding,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.EmbeddingResult.SerializeToString,
- ),
- 'GenerateImage': grpc.unary_unary_rpc_method_handler(
- servicer.GenerateImage,
- request_deserializer=backend__pb2.GenerateImageRequest.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'AudioTranscription': grpc.unary_unary_rpc_method_handler(
- servicer.AudioTranscription,
- request_deserializer=backend__pb2.TranscriptRequest.FromString,
- response_serializer=backend__pb2.TranscriptResult.SerializeToString,
- ),
- 'TTS': grpc.unary_unary_rpc_method_handler(
- servicer.TTS,
- request_deserializer=backend__pb2.TTSRequest.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'TokenizeString': grpc.unary_unary_rpc_method_handler(
- servicer.TokenizeString,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.TokenizationResponse.SerializeToString,
- ),
- 'Status': grpc.unary_unary_rpc_method_handler(
- servicer.Status,
- request_deserializer=backend__pb2.HealthMessage.FromString,
- response_serializer=backend__pb2.StatusResponse.SerializeToString,
- ),
- 'StoresSet': grpc.unary_unary_rpc_method_handler(
- servicer.StoresSet,
- request_deserializer=backend__pb2.StoresSetOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'StoresDelete': grpc.unary_unary_rpc_method_handler(
- servicer.StoresDelete,
- request_deserializer=backend__pb2.StoresDeleteOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'StoresGet': grpc.unary_unary_rpc_method_handler(
- servicer.StoresGet,
- request_deserializer=backend__pb2.StoresGetOptions.FromString,
- response_serializer=backend__pb2.StoresGetResult.SerializeToString,
- ),
- 'StoresFind': grpc.unary_unary_rpc_method_handler(
- servicer.StoresFind,
- request_deserializer=backend__pb2.StoresFindOptions.FromString,
- response_serializer=backend__pb2.StoresFindResult.SerializeToString,
- ),
- }
- generic_handler = grpc.method_handlers_generic_handler(
- 'backend.Backend', rpc_method_handlers)
- server.add_generic_rpc_handlers((generic_handler,))
-
-
- # This class is part of an EXPERIMENTAL API.
-class Backend(object):
- """Missing associated documentation comment in .proto file."""
-
- @staticmethod
- def Health(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health',
- backend__pb2.HealthMessage.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Predict(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def LoadModel(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel',
- backend__pb2.ModelOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def PredictStream(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Embedding(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.EmbeddingResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def GenerateImage(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage',
- backend__pb2.GenerateImageRequest.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def AudioTranscription(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription',
- backend__pb2.TranscriptRequest.SerializeToString,
- backend__pb2.TranscriptResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def TTS(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS',
- backend__pb2.TTSRequest.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def TokenizeString(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.TokenizationResponse.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Status(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status',
- backend__pb2.HealthMessage.SerializeToString,
- backend__pb2.StatusResponse.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresSet(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet',
- backend__pb2.StoresSetOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresDelete(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete',
- backend__pb2.StoresDeleteOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresGet(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet',
- backend__pb2.StoresGetOptions.SerializeToString,
- backend__pb2.StoresGetResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresFind(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind',
- backend__pb2.StoresFindOptions.SerializeToString,
- backend__pb2.StoresFindResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
diff --git a/backend/python/coqui/Makefile b/backend/python/coqui/Makefile
index e0ec9001..475804c9 100644
--- a/backend/python/coqui/Makefile
+++ b/backend/python/coqui/Makefile
@@ -1,15 +1,25 @@
.PHONY: coqui
-coqui:
+coqui: protogen
$(MAKE) -C ../common-env/transformers
.PHONY: run
-run:
+run: protogen
@echo "Running coqui..."
bash run.sh
@echo "coqui run."
.PHONY: test
-test:
+test: protogen
@echo "Testing coqui..."
bash test.sh
@echo "coqui tested."
+
+.PHONY: protogen
+protogen: backend_pb2_grpc.py backend_pb2.py
+
+.PHONY: protogen-clean
+protogen-clean:
+ $(RM) backend_pb2_grpc.py backend_pb2.py
+
+backend_pb2_grpc.py backend_pb2.py:
+ python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
\ No newline at end of file
diff --git a/backend/python/coqui/backend_pb2.py b/backend/python/coqui/backend_pb2.py
deleted file mode 100644
index 24b6de3b..00000000
--- a/backend/python/coqui/backend_pb2.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: backend.proto
-# Protobuf Python Version: 4.25.1
-"""Generated protocol buffer code."""
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import descriptor_pool as _descriptor_pool
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf.internal import builder as _builder
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
-
-_globals = globals()
-_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
-_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals)
-if _descriptor._USE_C_DESCRIPTORS == False:
- _globals['DESCRIPTOR']._options = None
- _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001'
- _globals['_STORESKEY']._serialized_start=26
- _globals['_STORESKEY']._serialized_end=53
- _globals['_STORESVALUE']._serialized_start=55
- _globals['_STORESVALUE']._serialized_end=83
- _globals['_STORESSETOPTIONS']._serialized_start=85
- _globals['_STORESSETOPTIONS']._serialized_end=175
- _globals['_STORESDELETEOPTIONS']._serialized_start=177
- _globals['_STORESDELETEOPTIONS']._serialized_end=232
- _globals['_STORESGETOPTIONS']._serialized_start=234
- _globals['_STORESGETOPTIONS']._serialized_end=286
- _globals['_STORESGETRESULT']._serialized_start=288
- _globals['_STORESGETRESULT']._serialized_end=377
- _globals['_STORESFINDOPTIONS']._serialized_start=379
- _globals['_STORESFINDOPTIONS']._serialized_end=445
- _globals['_STORESFINDRESULT']._serialized_start=447
- _globals['_STORESFINDRESULT']._serialized_end=559
- _globals['_HEALTHMESSAGE']._serialized_start=561
- _globals['_HEALTHMESSAGE']._serialized_end=576
- _globals['_PREDICTOPTIONS']._serialized_start=579
- _globals['_PREDICTOPTIONS']._serialized_end=1451
- _globals['_REPLY']._serialized_start=1453
- _globals['_REPLY']._serialized_end=1477
- _globals['_MODELOPTIONS']._serialized_start=1480
- _globals['_MODELOPTIONS']._serialized_end=2552
- _globals['_RESULT']._serialized_start=2554
- _globals['_RESULT']._serialized_end=2596
- _globals['_EMBEDDINGRESULT']._serialized_start=2598
- _globals['_EMBEDDINGRESULT']._serialized_end=2635
- _globals['_TRANSCRIPTREQUEST']._serialized_start=2637
- _globals['_TRANSCRIPTREQUEST']._serialized_end=2704
- _globals['_TRANSCRIPTRESULT']._serialized_start=2706
- _globals['_TRANSCRIPTRESULT']._serialized_end=2784
- _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786
- _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875
- _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878
- _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093
- _globals['_TTSREQUEST']._serialized_start=3095
- _globals['_TTSREQUEST']._serialized_end=3164
- _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166
- _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220
- _globals['_MEMORYUSAGEDATA']._serialized_start=3223
- _globals['_MEMORYUSAGEDATA']._serialized_end=3365
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365
- _globals['_STATUSRESPONSE']._serialized_start=3368
- _globals['_STATUSRESPONSE']._serialized_end=3541
- _globals['_STATUSRESPONSE_STATE']._serialized_start=3474
- _globals['_STATUSRESPONSE_STATE']._serialized_end=3541
- _globals['_MESSAGE']._serialized_start=3543
- _globals['_MESSAGE']._serialized_end=3583
- _globals['_BACKEND']._serialized_start=3586
- _globals['_BACKEND']._serialized_end=4477
-# @@protoc_insertion_point(module_scope)
diff --git a/backend/python/coqui/backend_pb2_grpc.py b/backend/python/coqui/backend_pb2_grpc.py
deleted file mode 100644
index e06fccf3..00000000
--- a/backend/python/coqui/backend_pb2_grpc.py
+++ /dev/null
@@ -1,495 +0,0 @@
-# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
-"""Client and server classes corresponding to protobuf-defined services."""
-import grpc
-
-import backend_pb2 as backend__pb2
-
-
-class BackendStub(object):
- """Missing associated documentation comment in .proto file."""
-
- def __init__(self, channel):
- """Constructor.
-
- Args:
- channel: A grpc.Channel.
- """
- self.Health = channel.unary_unary(
- '/backend.Backend/Health',
- request_serializer=backend__pb2.HealthMessage.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.Predict = channel.unary_unary(
- '/backend.Backend/Predict',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.LoadModel = channel.unary_unary(
- '/backend.Backend/LoadModel',
- request_serializer=backend__pb2.ModelOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.PredictStream = channel.unary_stream(
- '/backend.Backend/PredictStream',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.Embedding = channel.unary_unary(
- '/backend.Backend/Embedding',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.EmbeddingResult.FromString,
- )
- self.GenerateImage = channel.unary_unary(
- '/backend.Backend/GenerateImage',
- request_serializer=backend__pb2.GenerateImageRequest.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.AudioTranscription = channel.unary_unary(
- '/backend.Backend/AudioTranscription',
- request_serializer=backend__pb2.TranscriptRequest.SerializeToString,
- response_deserializer=backend__pb2.TranscriptResult.FromString,
- )
- self.TTS = channel.unary_unary(
- '/backend.Backend/TTS',
- request_serializer=backend__pb2.TTSRequest.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.TokenizeString = channel.unary_unary(
- '/backend.Backend/TokenizeString',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.TokenizationResponse.FromString,
- )
- self.Status = channel.unary_unary(
- '/backend.Backend/Status',
- request_serializer=backend__pb2.HealthMessage.SerializeToString,
- response_deserializer=backend__pb2.StatusResponse.FromString,
- )
- self.StoresSet = channel.unary_unary(
- '/backend.Backend/StoresSet',
- request_serializer=backend__pb2.StoresSetOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.StoresDelete = channel.unary_unary(
- '/backend.Backend/StoresDelete',
- request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.StoresGet = channel.unary_unary(
- '/backend.Backend/StoresGet',
- request_serializer=backend__pb2.StoresGetOptions.SerializeToString,
- response_deserializer=backend__pb2.StoresGetResult.FromString,
- )
- self.StoresFind = channel.unary_unary(
- '/backend.Backend/StoresFind',
- request_serializer=backend__pb2.StoresFindOptions.SerializeToString,
- response_deserializer=backend__pb2.StoresFindResult.FromString,
- )
-
-
-class BackendServicer(object):
- """Missing associated documentation comment in .proto file."""
-
- def Health(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Predict(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def LoadModel(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def PredictStream(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Embedding(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def GenerateImage(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def AudioTranscription(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def TTS(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def TokenizeString(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Status(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresSet(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresDelete(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresGet(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresFind(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
-
-def add_BackendServicer_to_server(servicer, server):
- rpc_method_handlers = {
- 'Health': grpc.unary_unary_rpc_method_handler(
- servicer.Health,
- request_deserializer=backend__pb2.HealthMessage.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'Predict': grpc.unary_unary_rpc_method_handler(
- servicer.Predict,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'LoadModel': grpc.unary_unary_rpc_method_handler(
- servicer.LoadModel,
- request_deserializer=backend__pb2.ModelOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'PredictStream': grpc.unary_stream_rpc_method_handler(
- servicer.PredictStream,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'Embedding': grpc.unary_unary_rpc_method_handler(
- servicer.Embedding,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.EmbeddingResult.SerializeToString,
- ),
- 'GenerateImage': grpc.unary_unary_rpc_method_handler(
- servicer.GenerateImage,
- request_deserializer=backend__pb2.GenerateImageRequest.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'AudioTranscription': grpc.unary_unary_rpc_method_handler(
- servicer.AudioTranscription,
- request_deserializer=backend__pb2.TranscriptRequest.FromString,
- response_serializer=backend__pb2.TranscriptResult.SerializeToString,
- ),
- 'TTS': grpc.unary_unary_rpc_method_handler(
- servicer.TTS,
- request_deserializer=backend__pb2.TTSRequest.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'TokenizeString': grpc.unary_unary_rpc_method_handler(
- servicer.TokenizeString,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.TokenizationResponse.SerializeToString,
- ),
- 'Status': grpc.unary_unary_rpc_method_handler(
- servicer.Status,
- request_deserializer=backend__pb2.HealthMessage.FromString,
- response_serializer=backend__pb2.StatusResponse.SerializeToString,
- ),
- 'StoresSet': grpc.unary_unary_rpc_method_handler(
- servicer.StoresSet,
- request_deserializer=backend__pb2.StoresSetOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'StoresDelete': grpc.unary_unary_rpc_method_handler(
- servicer.StoresDelete,
- request_deserializer=backend__pb2.StoresDeleteOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'StoresGet': grpc.unary_unary_rpc_method_handler(
- servicer.StoresGet,
- request_deserializer=backend__pb2.StoresGetOptions.FromString,
- response_serializer=backend__pb2.StoresGetResult.SerializeToString,
- ),
- 'StoresFind': grpc.unary_unary_rpc_method_handler(
- servicer.StoresFind,
- request_deserializer=backend__pb2.StoresFindOptions.FromString,
- response_serializer=backend__pb2.StoresFindResult.SerializeToString,
- ),
- }
- generic_handler = grpc.method_handlers_generic_handler(
- 'backend.Backend', rpc_method_handlers)
- server.add_generic_rpc_handlers((generic_handler,))
-
-
- # This class is part of an EXPERIMENTAL API.
-class Backend(object):
- """Missing associated documentation comment in .proto file."""
-
- @staticmethod
- def Health(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health',
- backend__pb2.HealthMessage.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Predict(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def LoadModel(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel',
- backend__pb2.ModelOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def PredictStream(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Embedding(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.EmbeddingResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def GenerateImage(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage',
- backend__pb2.GenerateImageRequest.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def AudioTranscription(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription',
- backend__pb2.TranscriptRequest.SerializeToString,
- backend__pb2.TranscriptResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def TTS(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS',
- backend__pb2.TTSRequest.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def TokenizeString(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.TokenizationResponse.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Status(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status',
- backend__pb2.HealthMessage.SerializeToString,
- backend__pb2.StatusResponse.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresSet(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet',
- backend__pb2.StoresSetOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresDelete(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete',
- backend__pb2.StoresDeleteOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresGet(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet',
- backend__pb2.StoresGetOptions.SerializeToString,
- backend__pb2.StoresGetResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresFind(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind',
- backend__pb2.StoresFindOptions.SerializeToString,
- backend__pb2.StoresFindResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
diff --git a/backend/python/diffusers/Makefile b/backend/python/diffusers/Makefile
index 40e1d1a7..c73efdd2 100644
--- a/backend/python/diffusers/Makefile
+++ b/backend/python/diffusers/Makefile
@@ -12,15 +12,25 @@ export SKIP_CONDA=1
endif
.PHONY: diffusers
-diffusers:
+diffusers: protogen
@echo "Installing $(CONDA_ENV_PATH)..."
bash install.sh $(CONDA_ENV_PATH)
.PHONY: run
-run:
+run: protogen
@echo "Running diffusers..."
bash run.sh
@echo "Diffusers run."
-test:
+test: protogen
bash test.sh
+
+.PHONY: protogen
+protogen: backend_pb2_grpc.py backend_pb2.py
+
+.PHONY: protogen-clean
+protogen-clean:
+ $(RM) backend_pb2_grpc.py backend_pb2.py
+
+backend_pb2_grpc.py backend_pb2.py:
+ python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
\ No newline at end of file
diff --git a/backend/python/diffusers/backend_pb2.py b/backend/python/diffusers/backend_pb2.py
deleted file mode 100644
index 24b6de3b..00000000
--- a/backend/python/diffusers/backend_pb2.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: backend.proto
-# Protobuf Python Version: 4.25.1
-"""Generated protocol buffer code."""
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import descriptor_pool as _descriptor_pool
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf.internal import builder as _builder
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
-
-_globals = globals()
-_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
-_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals)
-if _descriptor._USE_C_DESCRIPTORS == False:
- _globals['DESCRIPTOR']._options = None
- _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001'
- _globals['_STORESKEY']._serialized_start=26
- _globals['_STORESKEY']._serialized_end=53
- _globals['_STORESVALUE']._serialized_start=55
- _globals['_STORESVALUE']._serialized_end=83
- _globals['_STORESSETOPTIONS']._serialized_start=85
- _globals['_STORESSETOPTIONS']._serialized_end=175
- _globals['_STORESDELETEOPTIONS']._serialized_start=177
- _globals['_STORESDELETEOPTIONS']._serialized_end=232
- _globals['_STORESGETOPTIONS']._serialized_start=234
- _globals['_STORESGETOPTIONS']._serialized_end=286
- _globals['_STORESGETRESULT']._serialized_start=288
- _globals['_STORESGETRESULT']._serialized_end=377
- _globals['_STORESFINDOPTIONS']._serialized_start=379
- _globals['_STORESFINDOPTIONS']._serialized_end=445
- _globals['_STORESFINDRESULT']._serialized_start=447
- _globals['_STORESFINDRESULT']._serialized_end=559
- _globals['_HEALTHMESSAGE']._serialized_start=561
- _globals['_HEALTHMESSAGE']._serialized_end=576
- _globals['_PREDICTOPTIONS']._serialized_start=579
- _globals['_PREDICTOPTIONS']._serialized_end=1451
- _globals['_REPLY']._serialized_start=1453
- _globals['_REPLY']._serialized_end=1477
- _globals['_MODELOPTIONS']._serialized_start=1480
- _globals['_MODELOPTIONS']._serialized_end=2552
- _globals['_RESULT']._serialized_start=2554
- _globals['_RESULT']._serialized_end=2596
- _globals['_EMBEDDINGRESULT']._serialized_start=2598
- _globals['_EMBEDDINGRESULT']._serialized_end=2635
- _globals['_TRANSCRIPTREQUEST']._serialized_start=2637
- _globals['_TRANSCRIPTREQUEST']._serialized_end=2704
- _globals['_TRANSCRIPTRESULT']._serialized_start=2706
- _globals['_TRANSCRIPTRESULT']._serialized_end=2784
- _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786
- _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875
- _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878
- _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093
- _globals['_TTSREQUEST']._serialized_start=3095
- _globals['_TTSREQUEST']._serialized_end=3164
- _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166
- _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220
- _globals['_MEMORYUSAGEDATA']._serialized_start=3223
- _globals['_MEMORYUSAGEDATA']._serialized_end=3365
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365
- _globals['_STATUSRESPONSE']._serialized_start=3368
- _globals['_STATUSRESPONSE']._serialized_end=3541
- _globals['_STATUSRESPONSE_STATE']._serialized_start=3474
- _globals['_STATUSRESPONSE_STATE']._serialized_end=3541
- _globals['_MESSAGE']._serialized_start=3543
- _globals['_MESSAGE']._serialized_end=3583
- _globals['_BACKEND']._serialized_start=3586
- _globals['_BACKEND']._serialized_end=4477
-# @@protoc_insertion_point(module_scope)
diff --git a/backend/python/diffusers/backend_pb2_grpc.py b/backend/python/diffusers/backend_pb2_grpc.py
deleted file mode 100644
index e06fccf3..00000000
--- a/backend/python/diffusers/backend_pb2_grpc.py
+++ /dev/null
@@ -1,495 +0,0 @@
-# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
-"""Client and server classes corresponding to protobuf-defined services."""
-import grpc
-
-import backend_pb2 as backend__pb2
-
-
-class BackendStub(object):
- """Missing associated documentation comment in .proto file."""
-
- def __init__(self, channel):
- """Constructor.
-
- Args:
- channel: A grpc.Channel.
- """
- self.Health = channel.unary_unary(
- '/backend.Backend/Health',
- request_serializer=backend__pb2.HealthMessage.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.Predict = channel.unary_unary(
- '/backend.Backend/Predict',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.LoadModel = channel.unary_unary(
- '/backend.Backend/LoadModel',
- request_serializer=backend__pb2.ModelOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.PredictStream = channel.unary_stream(
- '/backend.Backend/PredictStream',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.Embedding = channel.unary_unary(
- '/backend.Backend/Embedding',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.EmbeddingResult.FromString,
- )
- self.GenerateImage = channel.unary_unary(
- '/backend.Backend/GenerateImage',
- request_serializer=backend__pb2.GenerateImageRequest.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.AudioTranscription = channel.unary_unary(
- '/backend.Backend/AudioTranscription',
- request_serializer=backend__pb2.TranscriptRequest.SerializeToString,
- response_deserializer=backend__pb2.TranscriptResult.FromString,
- )
- self.TTS = channel.unary_unary(
- '/backend.Backend/TTS',
- request_serializer=backend__pb2.TTSRequest.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.TokenizeString = channel.unary_unary(
- '/backend.Backend/TokenizeString',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.TokenizationResponse.FromString,
- )
- self.Status = channel.unary_unary(
- '/backend.Backend/Status',
- request_serializer=backend__pb2.HealthMessage.SerializeToString,
- response_deserializer=backend__pb2.StatusResponse.FromString,
- )
- self.StoresSet = channel.unary_unary(
- '/backend.Backend/StoresSet',
- request_serializer=backend__pb2.StoresSetOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.StoresDelete = channel.unary_unary(
- '/backend.Backend/StoresDelete',
- request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.StoresGet = channel.unary_unary(
- '/backend.Backend/StoresGet',
- request_serializer=backend__pb2.StoresGetOptions.SerializeToString,
- response_deserializer=backend__pb2.StoresGetResult.FromString,
- )
- self.StoresFind = channel.unary_unary(
- '/backend.Backend/StoresFind',
- request_serializer=backend__pb2.StoresFindOptions.SerializeToString,
- response_deserializer=backend__pb2.StoresFindResult.FromString,
- )
-
-
-class BackendServicer(object):
- """Missing associated documentation comment in .proto file."""
-
- def Health(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Predict(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def LoadModel(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def PredictStream(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Embedding(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def GenerateImage(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def AudioTranscription(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def TTS(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def TokenizeString(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Status(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresSet(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresDelete(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresGet(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresFind(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
-
-def add_BackendServicer_to_server(servicer, server):
- rpc_method_handlers = {
- 'Health': grpc.unary_unary_rpc_method_handler(
- servicer.Health,
- request_deserializer=backend__pb2.HealthMessage.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'Predict': grpc.unary_unary_rpc_method_handler(
- servicer.Predict,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'LoadModel': grpc.unary_unary_rpc_method_handler(
- servicer.LoadModel,
- request_deserializer=backend__pb2.ModelOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'PredictStream': grpc.unary_stream_rpc_method_handler(
- servicer.PredictStream,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'Embedding': grpc.unary_unary_rpc_method_handler(
- servicer.Embedding,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.EmbeddingResult.SerializeToString,
- ),
- 'GenerateImage': grpc.unary_unary_rpc_method_handler(
- servicer.GenerateImage,
- request_deserializer=backend__pb2.GenerateImageRequest.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'AudioTranscription': grpc.unary_unary_rpc_method_handler(
- servicer.AudioTranscription,
- request_deserializer=backend__pb2.TranscriptRequest.FromString,
- response_serializer=backend__pb2.TranscriptResult.SerializeToString,
- ),
- 'TTS': grpc.unary_unary_rpc_method_handler(
- servicer.TTS,
- request_deserializer=backend__pb2.TTSRequest.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'TokenizeString': grpc.unary_unary_rpc_method_handler(
- servicer.TokenizeString,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.TokenizationResponse.SerializeToString,
- ),
- 'Status': grpc.unary_unary_rpc_method_handler(
- servicer.Status,
- request_deserializer=backend__pb2.HealthMessage.FromString,
- response_serializer=backend__pb2.StatusResponse.SerializeToString,
- ),
- 'StoresSet': grpc.unary_unary_rpc_method_handler(
- servicer.StoresSet,
- request_deserializer=backend__pb2.StoresSetOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'StoresDelete': grpc.unary_unary_rpc_method_handler(
- servicer.StoresDelete,
- request_deserializer=backend__pb2.StoresDeleteOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'StoresGet': grpc.unary_unary_rpc_method_handler(
- servicer.StoresGet,
- request_deserializer=backend__pb2.StoresGetOptions.FromString,
- response_serializer=backend__pb2.StoresGetResult.SerializeToString,
- ),
- 'StoresFind': grpc.unary_unary_rpc_method_handler(
- servicer.StoresFind,
- request_deserializer=backend__pb2.StoresFindOptions.FromString,
- response_serializer=backend__pb2.StoresFindResult.SerializeToString,
- ),
- }
- generic_handler = grpc.method_handlers_generic_handler(
- 'backend.Backend', rpc_method_handlers)
- server.add_generic_rpc_handlers((generic_handler,))
-
-
- # This class is part of an EXPERIMENTAL API.
-class Backend(object):
- """Missing associated documentation comment in .proto file."""
-
- @staticmethod
- def Health(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health',
- backend__pb2.HealthMessage.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Predict(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def LoadModel(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel',
- backend__pb2.ModelOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def PredictStream(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Embedding(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.EmbeddingResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def GenerateImage(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage',
- backend__pb2.GenerateImageRequest.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def AudioTranscription(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription',
- backend__pb2.TranscriptRequest.SerializeToString,
- backend__pb2.TranscriptResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def TTS(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS',
- backend__pb2.TTSRequest.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def TokenizeString(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.TokenizationResponse.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Status(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status',
- backend__pb2.HealthMessage.SerializeToString,
- backend__pb2.StatusResponse.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresSet(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet',
- backend__pb2.StoresSetOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresDelete(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete',
- backend__pb2.StoresDeleteOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresGet(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet',
- backend__pb2.StoresGetOptions.SerializeToString,
- backend__pb2.StoresGetResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresFind(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind',
- backend__pb2.StoresFindOptions.SerializeToString,
- backend__pb2.StoresFindResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
diff --git a/backend/python/exllama/Makefile b/backend/python/exllama/Makefile
index b51adf76..15623448 100644
--- a/backend/python/exllama/Makefile
+++ b/backend/python/exllama/Makefile
@@ -1,11 +1,21 @@
export CONDA_ENV_PATH = "exllama.yml"
.PHONY: exllama
-exllama:
+exllama: protogen
bash install.sh ${CONDA_ENV_PATH}
.PHONY: run
-run:
+run: protogen
@echo "Running exllama..."
bash run.sh
@echo "exllama run."
+
+.PHONY: protogen
+protogen: backend_pb2_grpc.py backend_pb2.py
+
+.PHONY: protogen-clean
+protogen-clean:
+ $(RM) backend_pb2_grpc.py backend_pb2.py
+
+backend_pb2_grpc.py backend_pb2.py:
+ python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
\ No newline at end of file
diff --git a/backend/python/exllama/backend_pb2.py b/backend/python/exllama/backend_pb2.py
deleted file mode 100644
index 24b6de3b..00000000
--- a/backend/python/exllama/backend_pb2.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: backend.proto
-# Protobuf Python Version: 4.25.1
-"""Generated protocol buffer code."""
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import descriptor_pool as _descriptor_pool
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf.internal import builder as _builder
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
-
-_globals = globals()
-_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
-_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals)
-if _descriptor._USE_C_DESCRIPTORS == False:
- _globals['DESCRIPTOR']._options = None
- _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001'
- _globals['_STORESKEY']._serialized_start=26
- _globals['_STORESKEY']._serialized_end=53
- _globals['_STORESVALUE']._serialized_start=55
- _globals['_STORESVALUE']._serialized_end=83
- _globals['_STORESSETOPTIONS']._serialized_start=85
- _globals['_STORESSETOPTIONS']._serialized_end=175
- _globals['_STORESDELETEOPTIONS']._serialized_start=177
- _globals['_STORESDELETEOPTIONS']._serialized_end=232
- _globals['_STORESGETOPTIONS']._serialized_start=234
- _globals['_STORESGETOPTIONS']._serialized_end=286
- _globals['_STORESGETRESULT']._serialized_start=288
- _globals['_STORESGETRESULT']._serialized_end=377
- _globals['_STORESFINDOPTIONS']._serialized_start=379
- _globals['_STORESFINDOPTIONS']._serialized_end=445
- _globals['_STORESFINDRESULT']._serialized_start=447
- _globals['_STORESFINDRESULT']._serialized_end=559
- _globals['_HEALTHMESSAGE']._serialized_start=561
- _globals['_HEALTHMESSAGE']._serialized_end=576
- _globals['_PREDICTOPTIONS']._serialized_start=579
- _globals['_PREDICTOPTIONS']._serialized_end=1451
- _globals['_REPLY']._serialized_start=1453
- _globals['_REPLY']._serialized_end=1477
- _globals['_MODELOPTIONS']._serialized_start=1480
- _globals['_MODELOPTIONS']._serialized_end=2552
- _globals['_RESULT']._serialized_start=2554
- _globals['_RESULT']._serialized_end=2596
- _globals['_EMBEDDINGRESULT']._serialized_start=2598
- _globals['_EMBEDDINGRESULT']._serialized_end=2635
- _globals['_TRANSCRIPTREQUEST']._serialized_start=2637
- _globals['_TRANSCRIPTREQUEST']._serialized_end=2704
- _globals['_TRANSCRIPTRESULT']._serialized_start=2706
- _globals['_TRANSCRIPTRESULT']._serialized_end=2784
- _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786
- _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875
- _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878
- _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093
- _globals['_TTSREQUEST']._serialized_start=3095
- _globals['_TTSREQUEST']._serialized_end=3164
- _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166
- _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220
- _globals['_MEMORYUSAGEDATA']._serialized_start=3223
- _globals['_MEMORYUSAGEDATA']._serialized_end=3365
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365
- _globals['_STATUSRESPONSE']._serialized_start=3368
- _globals['_STATUSRESPONSE']._serialized_end=3541
- _globals['_STATUSRESPONSE_STATE']._serialized_start=3474
- _globals['_STATUSRESPONSE_STATE']._serialized_end=3541
- _globals['_MESSAGE']._serialized_start=3543
- _globals['_MESSAGE']._serialized_end=3583
- _globals['_BACKEND']._serialized_start=3586
- _globals['_BACKEND']._serialized_end=4477
-# @@protoc_insertion_point(module_scope)
diff --git a/backend/python/exllama/backend_pb2_grpc.py b/backend/python/exllama/backend_pb2_grpc.py
deleted file mode 100644
index e06fccf3..00000000
--- a/backend/python/exllama/backend_pb2_grpc.py
+++ /dev/null
@@ -1,495 +0,0 @@
-# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
-"""Client and server classes corresponding to protobuf-defined services."""
-import grpc
-
-import backend_pb2 as backend__pb2
-
-
-class BackendStub(object):
- """Missing associated documentation comment in .proto file."""
-
- def __init__(self, channel):
- """Constructor.
-
- Args:
- channel: A grpc.Channel.
- """
- self.Health = channel.unary_unary(
- '/backend.Backend/Health',
- request_serializer=backend__pb2.HealthMessage.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.Predict = channel.unary_unary(
- '/backend.Backend/Predict',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.LoadModel = channel.unary_unary(
- '/backend.Backend/LoadModel',
- request_serializer=backend__pb2.ModelOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.PredictStream = channel.unary_stream(
- '/backend.Backend/PredictStream',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.Embedding = channel.unary_unary(
- '/backend.Backend/Embedding',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.EmbeddingResult.FromString,
- )
- self.GenerateImage = channel.unary_unary(
- '/backend.Backend/GenerateImage',
- request_serializer=backend__pb2.GenerateImageRequest.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.AudioTranscription = channel.unary_unary(
- '/backend.Backend/AudioTranscription',
- request_serializer=backend__pb2.TranscriptRequest.SerializeToString,
- response_deserializer=backend__pb2.TranscriptResult.FromString,
- )
- self.TTS = channel.unary_unary(
- '/backend.Backend/TTS',
- request_serializer=backend__pb2.TTSRequest.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.TokenizeString = channel.unary_unary(
- '/backend.Backend/TokenizeString',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.TokenizationResponse.FromString,
- )
- self.Status = channel.unary_unary(
- '/backend.Backend/Status',
- request_serializer=backend__pb2.HealthMessage.SerializeToString,
- response_deserializer=backend__pb2.StatusResponse.FromString,
- )
- self.StoresSet = channel.unary_unary(
- '/backend.Backend/StoresSet',
- request_serializer=backend__pb2.StoresSetOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.StoresDelete = channel.unary_unary(
- '/backend.Backend/StoresDelete',
- request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.StoresGet = channel.unary_unary(
- '/backend.Backend/StoresGet',
- request_serializer=backend__pb2.StoresGetOptions.SerializeToString,
- response_deserializer=backend__pb2.StoresGetResult.FromString,
- )
- self.StoresFind = channel.unary_unary(
- '/backend.Backend/StoresFind',
- request_serializer=backend__pb2.StoresFindOptions.SerializeToString,
- response_deserializer=backend__pb2.StoresFindResult.FromString,
- )
-
-
-class BackendServicer(object):
- """Missing associated documentation comment in .proto file."""
-
- def Health(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Predict(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def LoadModel(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def PredictStream(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Embedding(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def GenerateImage(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def AudioTranscription(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def TTS(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def TokenizeString(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Status(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresSet(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresDelete(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresGet(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresFind(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
-
-def add_BackendServicer_to_server(servicer, server):
- rpc_method_handlers = {
- 'Health': grpc.unary_unary_rpc_method_handler(
- servicer.Health,
- request_deserializer=backend__pb2.HealthMessage.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'Predict': grpc.unary_unary_rpc_method_handler(
- servicer.Predict,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'LoadModel': grpc.unary_unary_rpc_method_handler(
- servicer.LoadModel,
- request_deserializer=backend__pb2.ModelOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'PredictStream': grpc.unary_stream_rpc_method_handler(
- servicer.PredictStream,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'Embedding': grpc.unary_unary_rpc_method_handler(
- servicer.Embedding,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.EmbeddingResult.SerializeToString,
- ),
- 'GenerateImage': grpc.unary_unary_rpc_method_handler(
- servicer.GenerateImage,
- request_deserializer=backend__pb2.GenerateImageRequest.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'AudioTranscription': grpc.unary_unary_rpc_method_handler(
- servicer.AudioTranscription,
- request_deserializer=backend__pb2.TranscriptRequest.FromString,
- response_serializer=backend__pb2.TranscriptResult.SerializeToString,
- ),
- 'TTS': grpc.unary_unary_rpc_method_handler(
- servicer.TTS,
- request_deserializer=backend__pb2.TTSRequest.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'TokenizeString': grpc.unary_unary_rpc_method_handler(
- servicer.TokenizeString,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.TokenizationResponse.SerializeToString,
- ),
- 'Status': grpc.unary_unary_rpc_method_handler(
- servicer.Status,
- request_deserializer=backend__pb2.HealthMessage.FromString,
- response_serializer=backend__pb2.StatusResponse.SerializeToString,
- ),
- 'StoresSet': grpc.unary_unary_rpc_method_handler(
- servicer.StoresSet,
- request_deserializer=backend__pb2.StoresSetOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'StoresDelete': grpc.unary_unary_rpc_method_handler(
- servicer.StoresDelete,
- request_deserializer=backend__pb2.StoresDeleteOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'StoresGet': grpc.unary_unary_rpc_method_handler(
- servicer.StoresGet,
- request_deserializer=backend__pb2.StoresGetOptions.FromString,
- response_serializer=backend__pb2.StoresGetResult.SerializeToString,
- ),
- 'StoresFind': grpc.unary_unary_rpc_method_handler(
- servicer.StoresFind,
- request_deserializer=backend__pb2.StoresFindOptions.FromString,
- response_serializer=backend__pb2.StoresFindResult.SerializeToString,
- ),
- }
- generic_handler = grpc.method_handlers_generic_handler(
- 'backend.Backend', rpc_method_handlers)
- server.add_generic_rpc_handlers((generic_handler,))
-
-
- # This class is part of an EXPERIMENTAL API.
-class Backend(object):
- """Missing associated documentation comment in .proto file."""
-
- @staticmethod
- def Health(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health',
- backend__pb2.HealthMessage.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Predict(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def LoadModel(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel',
- backend__pb2.ModelOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def PredictStream(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Embedding(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.EmbeddingResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def GenerateImage(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage',
- backend__pb2.GenerateImageRequest.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def AudioTranscription(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription',
- backend__pb2.TranscriptRequest.SerializeToString,
- backend__pb2.TranscriptResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def TTS(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS',
- backend__pb2.TTSRequest.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def TokenizeString(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.TokenizationResponse.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Status(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status',
- backend__pb2.HealthMessage.SerializeToString,
- backend__pb2.StatusResponse.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresSet(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet',
- backend__pb2.StoresSetOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresDelete(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete',
- backend__pb2.StoresDeleteOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresGet(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet',
- backend__pb2.StoresGetOptions.SerializeToString,
- backend__pb2.StoresGetResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresFind(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind',
- backend__pb2.StoresFindOptions.SerializeToString,
- backend__pb2.StoresFindResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
diff --git a/backend/python/exllama2/Makefile b/backend/python/exllama2/Makefile
index 24158151..6d6776b7 100644
--- a/backend/python/exllama2/Makefile
+++ b/backend/python/exllama2/Makefile
@@ -1,10 +1,20 @@
.PHONY: exllama2
-exllama2:
+exllama2: protogen
$(MAKE) -C ../common-env/transformers
bash install.sh
.PHONY: run
-run:
+run: protogen
@echo "Running exllama2..."
bash run.sh
@echo "exllama2 run."
+
+.PHONY: protogen
+protogen: backend_pb2_grpc.py backend_pb2.py
+
+.PHONY: protogen-clean
+protogen-clean:
+ $(RM) backend_pb2_grpc.py backend_pb2.py
+
+backend_pb2_grpc.py backend_pb2.py:
+ python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
\ No newline at end of file
diff --git a/backend/python/exllama2/backend_pb2.py b/backend/python/exllama2/backend_pb2.py
deleted file mode 100644
index 24b6de3b..00000000
--- a/backend/python/exllama2/backend_pb2.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: backend.proto
-# Protobuf Python Version: 4.25.1
-"""Generated protocol buffer code."""
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import descriptor_pool as _descriptor_pool
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf.internal import builder as _builder
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
-
-_globals = globals()
-_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
-_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals)
-if _descriptor._USE_C_DESCRIPTORS == False:
- _globals['DESCRIPTOR']._options = None
- _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001'
- _globals['_STORESKEY']._serialized_start=26
- _globals['_STORESKEY']._serialized_end=53
- _globals['_STORESVALUE']._serialized_start=55
- _globals['_STORESVALUE']._serialized_end=83
- _globals['_STORESSETOPTIONS']._serialized_start=85
- _globals['_STORESSETOPTIONS']._serialized_end=175
- _globals['_STORESDELETEOPTIONS']._serialized_start=177
- _globals['_STORESDELETEOPTIONS']._serialized_end=232
- _globals['_STORESGETOPTIONS']._serialized_start=234
- _globals['_STORESGETOPTIONS']._serialized_end=286
- _globals['_STORESGETRESULT']._serialized_start=288
- _globals['_STORESGETRESULT']._serialized_end=377
- _globals['_STORESFINDOPTIONS']._serialized_start=379
- _globals['_STORESFINDOPTIONS']._serialized_end=445
- _globals['_STORESFINDRESULT']._serialized_start=447
- _globals['_STORESFINDRESULT']._serialized_end=559
- _globals['_HEALTHMESSAGE']._serialized_start=561
- _globals['_HEALTHMESSAGE']._serialized_end=576
- _globals['_PREDICTOPTIONS']._serialized_start=579
- _globals['_PREDICTOPTIONS']._serialized_end=1451
- _globals['_REPLY']._serialized_start=1453
- _globals['_REPLY']._serialized_end=1477
- _globals['_MODELOPTIONS']._serialized_start=1480
- _globals['_MODELOPTIONS']._serialized_end=2552
- _globals['_RESULT']._serialized_start=2554
- _globals['_RESULT']._serialized_end=2596
- _globals['_EMBEDDINGRESULT']._serialized_start=2598
- _globals['_EMBEDDINGRESULT']._serialized_end=2635
- _globals['_TRANSCRIPTREQUEST']._serialized_start=2637
- _globals['_TRANSCRIPTREQUEST']._serialized_end=2704
- _globals['_TRANSCRIPTRESULT']._serialized_start=2706
- _globals['_TRANSCRIPTRESULT']._serialized_end=2784
- _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786
- _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875
- _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878
- _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093
- _globals['_TTSREQUEST']._serialized_start=3095
- _globals['_TTSREQUEST']._serialized_end=3164
- _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166
- _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220
- _globals['_MEMORYUSAGEDATA']._serialized_start=3223
- _globals['_MEMORYUSAGEDATA']._serialized_end=3365
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365
- _globals['_STATUSRESPONSE']._serialized_start=3368
- _globals['_STATUSRESPONSE']._serialized_end=3541
- _globals['_STATUSRESPONSE_STATE']._serialized_start=3474
- _globals['_STATUSRESPONSE_STATE']._serialized_end=3541
- _globals['_MESSAGE']._serialized_start=3543
- _globals['_MESSAGE']._serialized_end=3583
- _globals['_BACKEND']._serialized_start=3586
- _globals['_BACKEND']._serialized_end=4477
-# @@protoc_insertion_point(module_scope)
diff --git a/backend/python/exllama2/backend_pb2_grpc.py b/backend/python/exllama2/backend_pb2_grpc.py
deleted file mode 100644
index e06fccf3..00000000
--- a/backend/python/exllama2/backend_pb2_grpc.py
+++ /dev/null
@@ -1,495 +0,0 @@
-# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
-"""Client and server classes corresponding to protobuf-defined services."""
-import grpc
-
-import backend_pb2 as backend__pb2
-
-
-class BackendStub(object):
- """Missing associated documentation comment in .proto file."""
-
- def __init__(self, channel):
- """Constructor.
-
- Args:
- channel: A grpc.Channel.
- """
- self.Health = channel.unary_unary(
- '/backend.Backend/Health',
- request_serializer=backend__pb2.HealthMessage.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.Predict = channel.unary_unary(
- '/backend.Backend/Predict',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.LoadModel = channel.unary_unary(
- '/backend.Backend/LoadModel',
- request_serializer=backend__pb2.ModelOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.PredictStream = channel.unary_stream(
- '/backend.Backend/PredictStream',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.Embedding = channel.unary_unary(
- '/backend.Backend/Embedding',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.EmbeddingResult.FromString,
- )
- self.GenerateImage = channel.unary_unary(
- '/backend.Backend/GenerateImage',
- request_serializer=backend__pb2.GenerateImageRequest.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.AudioTranscription = channel.unary_unary(
- '/backend.Backend/AudioTranscription',
- request_serializer=backend__pb2.TranscriptRequest.SerializeToString,
- response_deserializer=backend__pb2.TranscriptResult.FromString,
- )
- self.TTS = channel.unary_unary(
- '/backend.Backend/TTS',
- request_serializer=backend__pb2.TTSRequest.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.TokenizeString = channel.unary_unary(
- '/backend.Backend/TokenizeString',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.TokenizationResponse.FromString,
- )
- self.Status = channel.unary_unary(
- '/backend.Backend/Status',
- request_serializer=backend__pb2.HealthMessage.SerializeToString,
- response_deserializer=backend__pb2.StatusResponse.FromString,
- )
- self.StoresSet = channel.unary_unary(
- '/backend.Backend/StoresSet',
- request_serializer=backend__pb2.StoresSetOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.StoresDelete = channel.unary_unary(
- '/backend.Backend/StoresDelete',
- request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.StoresGet = channel.unary_unary(
- '/backend.Backend/StoresGet',
- request_serializer=backend__pb2.StoresGetOptions.SerializeToString,
- response_deserializer=backend__pb2.StoresGetResult.FromString,
- )
- self.StoresFind = channel.unary_unary(
- '/backend.Backend/StoresFind',
- request_serializer=backend__pb2.StoresFindOptions.SerializeToString,
- response_deserializer=backend__pb2.StoresFindResult.FromString,
- )
-
-
-class BackendServicer(object):
- """Missing associated documentation comment in .proto file."""
-
- def Health(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Predict(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def LoadModel(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def PredictStream(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Embedding(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def GenerateImage(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def AudioTranscription(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def TTS(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def TokenizeString(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Status(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresSet(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresDelete(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresGet(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresFind(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
-
-def add_BackendServicer_to_server(servicer, server):
- rpc_method_handlers = {
- 'Health': grpc.unary_unary_rpc_method_handler(
- servicer.Health,
- request_deserializer=backend__pb2.HealthMessage.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'Predict': grpc.unary_unary_rpc_method_handler(
- servicer.Predict,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'LoadModel': grpc.unary_unary_rpc_method_handler(
- servicer.LoadModel,
- request_deserializer=backend__pb2.ModelOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'PredictStream': grpc.unary_stream_rpc_method_handler(
- servicer.PredictStream,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'Embedding': grpc.unary_unary_rpc_method_handler(
- servicer.Embedding,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.EmbeddingResult.SerializeToString,
- ),
- 'GenerateImage': grpc.unary_unary_rpc_method_handler(
- servicer.GenerateImage,
- request_deserializer=backend__pb2.GenerateImageRequest.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'AudioTranscription': grpc.unary_unary_rpc_method_handler(
- servicer.AudioTranscription,
- request_deserializer=backend__pb2.TranscriptRequest.FromString,
- response_serializer=backend__pb2.TranscriptResult.SerializeToString,
- ),
- 'TTS': grpc.unary_unary_rpc_method_handler(
- servicer.TTS,
- request_deserializer=backend__pb2.TTSRequest.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'TokenizeString': grpc.unary_unary_rpc_method_handler(
- servicer.TokenizeString,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.TokenizationResponse.SerializeToString,
- ),
- 'Status': grpc.unary_unary_rpc_method_handler(
- servicer.Status,
- request_deserializer=backend__pb2.HealthMessage.FromString,
- response_serializer=backend__pb2.StatusResponse.SerializeToString,
- ),
- 'StoresSet': grpc.unary_unary_rpc_method_handler(
- servicer.StoresSet,
- request_deserializer=backend__pb2.StoresSetOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'StoresDelete': grpc.unary_unary_rpc_method_handler(
- servicer.StoresDelete,
- request_deserializer=backend__pb2.StoresDeleteOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'StoresGet': grpc.unary_unary_rpc_method_handler(
- servicer.StoresGet,
- request_deserializer=backend__pb2.StoresGetOptions.FromString,
- response_serializer=backend__pb2.StoresGetResult.SerializeToString,
- ),
- 'StoresFind': grpc.unary_unary_rpc_method_handler(
- servicer.StoresFind,
- request_deserializer=backend__pb2.StoresFindOptions.FromString,
- response_serializer=backend__pb2.StoresFindResult.SerializeToString,
- ),
- }
- generic_handler = grpc.method_handlers_generic_handler(
- 'backend.Backend', rpc_method_handlers)
- server.add_generic_rpc_handlers((generic_handler,))
-
-
- # This class is part of an EXPERIMENTAL API.
-class Backend(object):
- """Missing associated documentation comment in .proto file."""
-
- @staticmethod
- def Health(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health',
- backend__pb2.HealthMessage.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Predict(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def LoadModel(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel',
- backend__pb2.ModelOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def PredictStream(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Embedding(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.EmbeddingResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def GenerateImage(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage',
- backend__pb2.GenerateImageRequest.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def AudioTranscription(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription',
- backend__pb2.TranscriptRequest.SerializeToString,
- backend__pb2.TranscriptResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def TTS(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS',
- backend__pb2.TTSRequest.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def TokenizeString(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.TokenizationResponse.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Status(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status',
- backend__pb2.HealthMessage.SerializeToString,
- backend__pb2.StatusResponse.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresSet(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet',
- backend__pb2.StoresSetOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresDelete(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete',
- backend__pb2.StoresDeleteOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresGet(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet',
- backend__pb2.StoresGetOptions.SerializeToString,
- backend__pb2.StoresGetResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresFind(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind',
- backend__pb2.StoresFindOptions.SerializeToString,
- backend__pb2.StoresFindResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
diff --git a/backend/python/mamba/Makefile b/backend/python/mamba/Makefile
index 3ff00346..ca18e609 100644
--- a/backend/python/mamba/Makefile
+++ b/backend/python/mamba/Makefile
@@ -1,16 +1,26 @@
.PHONY: mamba
-mamba:
+mamba: protogen
$(MAKE) -C ../common-env/transformers
bash install.sh
.PHONY: run
-run:
+run: protogen
@echo "Running mamba..."
bash run.sh
@echo "mamba run."
.PHONY: test
-test:
+test: protogen
@echo "Testing mamba..."
bash test.sh
- @echo "mamba tested."
\ No newline at end of file
+ @echo "mamba tested."
+
+.PHONY: protogen
+protogen: backend_pb2_grpc.py backend_pb2.py
+
+.PHONY: protogen-clean
+protogen-clean:
+ $(RM) backend_pb2_grpc.py backend_pb2.py
+
+backend_pb2_grpc.py backend_pb2.py:
+ python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
\ No newline at end of file
diff --git a/backend/python/mamba/backend_pb2.py b/backend/python/mamba/backend_pb2.py
deleted file mode 100644
index 24b6de3b..00000000
--- a/backend/python/mamba/backend_pb2.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: backend.proto
-# Protobuf Python Version: 4.25.1
-"""Generated protocol buffer code."""
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import descriptor_pool as _descriptor_pool
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf.internal import builder as _builder
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
-
-_globals = globals()
-_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
-_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals)
-if _descriptor._USE_C_DESCRIPTORS == False:
- _globals['DESCRIPTOR']._options = None
- _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001'
- _globals['_STORESKEY']._serialized_start=26
- _globals['_STORESKEY']._serialized_end=53
- _globals['_STORESVALUE']._serialized_start=55
- _globals['_STORESVALUE']._serialized_end=83
- _globals['_STORESSETOPTIONS']._serialized_start=85
- _globals['_STORESSETOPTIONS']._serialized_end=175
- _globals['_STORESDELETEOPTIONS']._serialized_start=177
- _globals['_STORESDELETEOPTIONS']._serialized_end=232
- _globals['_STORESGETOPTIONS']._serialized_start=234
- _globals['_STORESGETOPTIONS']._serialized_end=286
- _globals['_STORESGETRESULT']._serialized_start=288
- _globals['_STORESGETRESULT']._serialized_end=377
- _globals['_STORESFINDOPTIONS']._serialized_start=379
- _globals['_STORESFINDOPTIONS']._serialized_end=445
- _globals['_STORESFINDRESULT']._serialized_start=447
- _globals['_STORESFINDRESULT']._serialized_end=559
- _globals['_HEALTHMESSAGE']._serialized_start=561
- _globals['_HEALTHMESSAGE']._serialized_end=576
- _globals['_PREDICTOPTIONS']._serialized_start=579
- _globals['_PREDICTOPTIONS']._serialized_end=1451
- _globals['_REPLY']._serialized_start=1453
- _globals['_REPLY']._serialized_end=1477
- _globals['_MODELOPTIONS']._serialized_start=1480
- _globals['_MODELOPTIONS']._serialized_end=2552
- _globals['_RESULT']._serialized_start=2554
- _globals['_RESULT']._serialized_end=2596
- _globals['_EMBEDDINGRESULT']._serialized_start=2598
- _globals['_EMBEDDINGRESULT']._serialized_end=2635
- _globals['_TRANSCRIPTREQUEST']._serialized_start=2637
- _globals['_TRANSCRIPTREQUEST']._serialized_end=2704
- _globals['_TRANSCRIPTRESULT']._serialized_start=2706
- _globals['_TRANSCRIPTRESULT']._serialized_end=2784
- _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786
- _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875
- _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878
- _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093
- _globals['_TTSREQUEST']._serialized_start=3095
- _globals['_TTSREQUEST']._serialized_end=3164
- _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166
- _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220
- _globals['_MEMORYUSAGEDATA']._serialized_start=3223
- _globals['_MEMORYUSAGEDATA']._serialized_end=3365
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365
- _globals['_STATUSRESPONSE']._serialized_start=3368
- _globals['_STATUSRESPONSE']._serialized_end=3541
- _globals['_STATUSRESPONSE_STATE']._serialized_start=3474
- _globals['_STATUSRESPONSE_STATE']._serialized_end=3541
- _globals['_MESSAGE']._serialized_start=3543
- _globals['_MESSAGE']._serialized_end=3583
- _globals['_BACKEND']._serialized_start=3586
- _globals['_BACKEND']._serialized_end=4477
-# @@protoc_insertion_point(module_scope)
diff --git a/backend/python/mamba/backend_pb2_grpc.py b/backend/python/mamba/backend_pb2_grpc.py
deleted file mode 100644
index e06fccf3..00000000
--- a/backend/python/mamba/backend_pb2_grpc.py
+++ /dev/null
@@ -1,495 +0,0 @@
-# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
-"""Client and server classes corresponding to protobuf-defined services."""
-import grpc
-
-import backend_pb2 as backend__pb2
-
-
-class BackendStub(object):
- """Missing associated documentation comment in .proto file."""
-
- def __init__(self, channel):
- """Constructor.
-
- Args:
- channel: A grpc.Channel.
- """
- self.Health = channel.unary_unary(
- '/backend.Backend/Health',
- request_serializer=backend__pb2.HealthMessage.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.Predict = channel.unary_unary(
- '/backend.Backend/Predict',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.LoadModel = channel.unary_unary(
- '/backend.Backend/LoadModel',
- request_serializer=backend__pb2.ModelOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.PredictStream = channel.unary_stream(
- '/backend.Backend/PredictStream',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.Embedding = channel.unary_unary(
- '/backend.Backend/Embedding',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.EmbeddingResult.FromString,
- )
- self.GenerateImage = channel.unary_unary(
- '/backend.Backend/GenerateImage',
- request_serializer=backend__pb2.GenerateImageRequest.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.AudioTranscription = channel.unary_unary(
- '/backend.Backend/AudioTranscription',
- request_serializer=backend__pb2.TranscriptRequest.SerializeToString,
- response_deserializer=backend__pb2.TranscriptResult.FromString,
- )
- self.TTS = channel.unary_unary(
- '/backend.Backend/TTS',
- request_serializer=backend__pb2.TTSRequest.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.TokenizeString = channel.unary_unary(
- '/backend.Backend/TokenizeString',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.TokenizationResponse.FromString,
- )
- self.Status = channel.unary_unary(
- '/backend.Backend/Status',
- request_serializer=backend__pb2.HealthMessage.SerializeToString,
- response_deserializer=backend__pb2.StatusResponse.FromString,
- )
- self.StoresSet = channel.unary_unary(
- '/backend.Backend/StoresSet',
- request_serializer=backend__pb2.StoresSetOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.StoresDelete = channel.unary_unary(
- '/backend.Backend/StoresDelete',
- request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.StoresGet = channel.unary_unary(
- '/backend.Backend/StoresGet',
- request_serializer=backend__pb2.StoresGetOptions.SerializeToString,
- response_deserializer=backend__pb2.StoresGetResult.FromString,
- )
- self.StoresFind = channel.unary_unary(
- '/backend.Backend/StoresFind',
- request_serializer=backend__pb2.StoresFindOptions.SerializeToString,
- response_deserializer=backend__pb2.StoresFindResult.FromString,
- )
-
-
-class BackendServicer(object):
- """Missing associated documentation comment in .proto file."""
-
- def Health(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Predict(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def LoadModel(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def PredictStream(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Embedding(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def GenerateImage(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def AudioTranscription(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def TTS(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def TokenizeString(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Status(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresSet(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresDelete(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresGet(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresFind(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
-
-def add_BackendServicer_to_server(servicer, server):
- rpc_method_handlers = {
- 'Health': grpc.unary_unary_rpc_method_handler(
- servicer.Health,
- request_deserializer=backend__pb2.HealthMessage.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'Predict': grpc.unary_unary_rpc_method_handler(
- servicer.Predict,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'LoadModel': grpc.unary_unary_rpc_method_handler(
- servicer.LoadModel,
- request_deserializer=backend__pb2.ModelOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'PredictStream': grpc.unary_stream_rpc_method_handler(
- servicer.PredictStream,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'Embedding': grpc.unary_unary_rpc_method_handler(
- servicer.Embedding,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.EmbeddingResult.SerializeToString,
- ),
- 'GenerateImage': grpc.unary_unary_rpc_method_handler(
- servicer.GenerateImage,
- request_deserializer=backend__pb2.GenerateImageRequest.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'AudioTranscription': grpc.unary_unary_rpc_method_handler(
- servicer.AudioTranscription,
- request_deserializer=backend__pb2.TranscriptRequest.FromString,
- response_serializer=backend__pb2.TranscriptResult.SerializeToString,
- ),
- 'TTS': grpc.unary_unary_rpc_method_handler(
- servicer.TTS,
- request_deserializer=backend__pb2.TTSRequest.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'TokenizeString': grpc.unary_unary_rpc_method_handler(
- servicer.TokenizeString,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.TokenizationResponse.SerializeToString,
- ),
- 'Status': grpc.unary_unary_rpc_method_handler(
- servicer.Status,
- request_deserializer=backend__pb2.HealthMessage.FromString,
- response_serializer=backend__pb2.StatusResponse.SerializeToString,
- ),
- 'StoresSet': grpc.unary_unary_rpc_method_handler(
- servicer.StoresSet,
- request_deserializer=backend__pb2.StoresSetOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'StoresDelete': grpc.unary_unary_rpc_method_handler(
- servicer.StoresDelete,
- request_deserializer=backend__pb2.StoresDeleteOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'StoresGet': grpc.unary_unary_rpc_method_handler(
- servicer.StoresGet,
- request_deserializer=backend__pb2.StoresGetOptions.FromString,
- response_serializer=backend__pb2.StoresGetResult.SerializeToString,
- ),
- 'StoresFind': grpc.unary_unary_rpc_method_handler(
- servicer.StoresFind,
- request_deserializer=backend__pb2.StoresFindOptions.FromString,
- response_serializer=backend__pb2.StoresFindResult.SerializeToString,
- ),
- }
- generic_handler = grpc.method_handlers_generic_handler(
- 'backend.Backend', rpc_method_handlers)
- server.add_generic_rpc_handlers((generic_handler,))
-
-
- # This class is part of an EXPERIMENTAL API.
-class Backend(object):
- """Missing associated documentation comment in .proto file."""
-
- @staticmethod
- def Health(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health',
- backend__pb2.HealthMessage.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Predict(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def LoadModel(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel',
- backend__pb2.ModelOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def PredictStream(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Embedding(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.EmbeddingResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def GenerateImage(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage',
- backend__pb2.GenerateImageRequest.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def AudioTranscription(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription',
- backend__pb2.TranscriptRequest.SerializeToString,
- backend__pb2.TranscriptResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def TTS(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS',
- backend__pb2.TTSRequest.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def TokenizeString(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.TokenizationResponse.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Status(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status',
- backend__pb2.HealthMessage.SerializeToString,
- backend__pb2.StatusResponse.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresSet(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet',
- backend__pb2.StoresSetOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresDelete(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete',
- backend__pb2.StoresDeleteOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresGet(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet',
- backend__pb2.StoresGetOptions.SerializeToString,
- backend__pb2.StoresGetResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresFind(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind',
- backend__pb2.StoresFindOptions.SerializeToString,
- backend__pb2.StoresFindResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
diff --git a/backend/python/petals/Makefile b/backend/python/petals/Makefile
index aa7778e1..0ed64a07 100644
--- a/backend/python/petals/Makefile
+++ b/backend/python/petals/Makefile
@@ -1,17 +1,27 @@
.PHONY: petals
-petals:
+petals: protogen
@echo "Creating virtual environment..."
bash install.sh "petals.yml"
@echo "Virtual environment created."
.PHONY: run
-run:
+run: protogen
@echo "Running petals..."
bash run.sh
@echo "petals run."
.PHONY: test
-test:
+test: protogen
@echo "Testing petals..."
bash test.sh
@echo "petals tested."
+
+.PHONY: protogen
+protogen: backend_pb2_grpc.py backend_pb2.py
+
+.PHONY: protogen-clean
+protogen-clean:
+ $(RM) backend_pb2_grpc.py backend_pb2.py
+
+backend_pb2_grpc.py backend_pb2.py:
+ python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
\ No newline at end of file
diff --git a/backend/python/petals/backend_pb2.py b/backend/python/petals/backend_pb2.py
deleted file mode 100644
index 24b6de3b..00000000
--- a/backend/python/petals/backend_pb2.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: backend.proto
-# Protobuf Python Version: 4.25.1
-"""Generated protocol buffer code."""
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import descriptor_pool as _descriptor_pool
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf.internal import builder as _builder
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
-
-_globals = globals()
-_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
-_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals)
-if _descriptor._USE_C_DESCRIPTORS == False:
- _globals['DESCRIPTOR']._options = None
- _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001'
- _globals['_STORESKEY']._serialized_start=26
- _globals['_STORESKEY']._serialized_end=53
- _globals['_STORESVALUE']._serialized_start=55
- _globals['_STORESVALUE']._serialized_end=83
- _globals['_STORESSETOPTIONS']._serialized_start=85
- _globals['_STORESSETOPTIONS']._serialized_end=175
- _globals['_STORESDELETEOPTIONS']._serialized_start=177
- _globals['_STORESDELETEOPTIONS']._serialized_end=232
- _globals['_STORESGETOPTIONS']._serialized_start=234
- _globals['_STORESGETOPTIONS']._serialized_end=286
- _globals['_STORESGETRESULT']._serialized_start=288
- _globals['_STORESGETRESULT']._serialized_end=377
- _globals['_STORESFINDOPTIONS']._serialized_start=379
- _globals['_STORESFINDOPTIONS']._serialized_end=445
- _globals['_STORESFINDRESULT']._serialized_start=447
- _globals['_STORESFINDRESULT']._serialized_end=559
- _globals['_HEALTHMESSAGE']._serialized_start=561
- _globals['_HEALTHMESSAGE']._serialized_end=576
- _globals['_PREDICTOPTIONS']._serialized_start=579
- _globals['_PREDICTOPTIONS']._serialized_end=1451
- _globals['_REPLY']._serialized_start=1453
- _globals['_REPLY']._serialized_end=1477
- _globals['_MODELOPTIONS']._serialized_start=1480
- _globals['_MODELOPTIONS']._serialized_end=2552
- _globals['_RESULT']._serialized_start=2554
- _globals['_RESULT']._serialized_end=2596
- _globals['_EMBEDDINGRESULT']._serialized_start=2598
- _globals['_EMBEDDINGRESULT']._serialized_end=2635
- _globals['_TRANSCRIPTREQUEST']._serialized_start=2637
- _globals['_TRANSCRIPTREQUEST']._serialized_end=2704
- _globals['_TRANSCRIPTRESULT']._serialized_start=2706
- _globals['_TRANSCRIPTRESULT']._serialized_end=2784
- _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786
- _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875
- _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878
- _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093
- _globals['_TTSREQUEST']._serialized_start=3095
- _globals['_TTSREQUEST']._serialized_end=3164
- _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166
- _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220
- _globals['_MEMORYUSAGEDATA']._serialized_start=3223
- _globals['_MEMORYUSAGEDATA']._serialized_end=3365
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365
- _globals['_STATUSRESPONSE']._serialized_start=3368
- _globals['_STATUSRESPONSE']._serialized_end=3541
- _globals['_STATUSRESPONSE_STATE']._serialized_start=3474
- _globals['_STATUSRESPONSE_STATE']._serialized_end=3541
- _globals['_MESSAGE']._serialized_start=3543
- _globals['_MESSAGE']._serialized_end=3583
- _globals['_BACKEND']._serialized_start=3586
- _globals['_BACKEND']._serialized_end=4477
-# @@protoc_insertion_point(module_scope)
diff --git a/backend/python/petals/backend_pb2_grpc.py b/backend/python/petals/backend_pb2_grpc.py
deleted file mode 100644
index e06fccf3..00000000
--- a/backend/python/petals/backend_pb2_grpc.py
+++ /dev/null
@@ -1,495 +0,0 @@
-# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
-"""Client and server classes corresponding to protobuf-defined services."""
-import grpc
-
-import backend_pb2 as backend__pb2
-
-
-class BackendStub(object):
- """Missing associated documentation comment in .proto file."""
-
- def __init__(self, channel):
- """Constructor.
-
- Args:
- channel: A grpc.Channel.
- """
- self.Health = channel.unary_unary(
- '/backend.Backend/Health',
- request_serializer=backend__pb2.HealthMessage.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.Predict = channel.unary_unary(
- '/backend.Backend/Predict',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.LoadModel = channel.unary_unary(
- '/backend.Backend/LoadModel',
- request_serializer=backend__pb2.ModelOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.PredictStream = channel.unary_stream(
- '/backend.Backend/PredictStream',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.Embedding = channel.unary_unary(
- '/backend.Backend/Embedding',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.EmbeddingResult.FromString,
- )
- self.GenerateImage = channel.unary_unary(
- '/backend.Backend/GenerateImage',
- request_serializer=backend__pb2.GenerateImageRequest.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.AudioTranscription = channel.unary_unary(
- '/backend.Backend/AudioTranscription',
- request_serializer=backend__pb2.TranscriptRequest.SerializeToString,
- response_deserializer=backend__pb2.TranscriptResult.FromString,
- )
- self.TTS = channel.unary_unary(
- '/backend.Backend/TTS',
- request_serializer=backend__pb2.TTSRequest.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.TokenizeString = channel.unary_unary(
- '/backend.Backend/TokenizeString',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.TokenizationResponse.FromString,
- )
- self.Status = channel.unary_unary(
- '/backend.Backend/Status',
- request_serializer=backend__pb2.HealthMessage.SerializeToString,
- response_deserializer=backend__pb2.StatusResponse.FromString,
- )
- self.StoresSet = channel.unary_unary(
- '/backend.Backend/StoresSet',
- request_serializer=backend__pb2.StoresSetOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.StoresDelete = channel.unary_unary(
- '/backend.Backend/StoresDelete',
- request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.StoresGet = channel.unary_unary(
- '/backend.Backend/StoresGet',
- request_serializer=backend__pb2.StoresGetOptions.SerializeToString,
- response_deserializer=backend__pb2.StoresGetResult.FromString,
- )
- self.StoresFind = channel.unary_unary(
- '/backend.Backend/StoresFind',
- request_serializer=backend__pb2.StoresFindOptions.SerializeToString,
- response_deserializer=backend__pb2.StoresFindResult.FromString,
- )
-
-
-class BackendServicer(object):
- """Missing associated documentation comment in .proto file."""
-
- def Health(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Predict(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def LoadModel(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def PredictStream(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Embedding(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def GenerateImage(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def AudioTranscription(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def TTS(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def TokenizeString(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Status(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresSet(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresDelete(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresGet(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresFind(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
-
-def add_BackendServicer_to_server(servicer, server):
- rpc_method_handlers = {
- 'Health': grpc.unary_unary_rpc_method_handler(
- servicer.Health,
- request_deserializer=backend__pb2.HealthMessage.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'Predict': grpc.unary_unary_rpc_method_handler(
- servicer.Predict,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'LoadModel': grpc.unary_unary_rpc_method_handler(
- servicer.LoadModel,
- request_deserializer=backend__pb2.ModelOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'PredictStream': grpc.unary_stream_rpc_method_handler(
- servicer.PredictStream,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'Embedding': grpc.unary_unary_rpc_method_handler(
- servicer.Embedding,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.EmbeddingResult.SerializeToString,
- ),
- 'GenerateImage': grpc.unary_unary_rpc_method_handler(
- servicer.GenerateImage,
- request_deserializer=backend__pb2.GenerateImageRequest.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'AudioTranscription': grpc.unary_unary_rpc_method_handler(
- servicer.AudioTranscription,
- request_deserializer=backend__pb2.TranscriptRequest.FromString,
- response_serializer=backend__pb2.TranscriptResult.SerializeToString,
- ),
- 'TTS': grpc.unary_unary_rpc_method_handler(
- servicer.TTS,
- request_deserializer=backend__pb2.TTSRequest.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'TokenizeString': grpc.unary_unary_rpc_method_handler(
- servicer.TokenizeString,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.TokenizationResponse.SerializeToString,
- ),
- 'Status': grpc.unary_unary_rpc_method_handler(
- servicer.Status,
- request_deserializer=backend__pb2.HealthMessage.FromString,
- response_serializer=backend__pb2.StatusResponse.SerializeToString,
- ),
- 'StoresSet': grpc.unary_unary_rpc_method_handler(
- servicer.StoresSet,
- request_deserializer=backend__pb2.StoresSetOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'StoresDelete': grpc.unary_unary_rpc_method_handler(
- servicer.StoresDelete,
- request_deserializer=backend__pb2.StoresDeleteOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'StoresGet': grpc.unary_unary_rpc_method_handler(
- servicer.StoresGet,
- request_deserializer=backend__pb2.StoresGetOptions.FromString,
- response_serializer=backend__pb2.StoresGetResult.SerializeToString,
- ),
- 'StoresFind': grpc.unary_unary_rpc_method_handler(
- servicer.StoresFind,
- request_deserializer=backend__pb2.StoresFindOptions.FromString,
- response_serializer=backend__pb2.StoresFindResult.SerializeToString,
- ),
- }
- generic_handler = grpc.method_handlers_generic_handler(
- 'backend.Backend', rpc_method_handlers)
- server.add_generic_rpc_handlers((generic_handler,))
-
-
- # This class is part of an EXPERIMENTAL API.
-class Backend(object):
- """Missing associated documentation comment in .proto file."""
-
- @staticmethod
- def Health(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health',
- backend__pb2.HealthMessage.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Predict(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def LoadModel(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel',
- backend__pb2.ModelOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def PredictStream(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Embedding(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.EmbeddingResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def GenerateImage(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage',
- backend__pb2.GenerateImageRequest.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def AudioTranscription(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription',
- backend__pb2.TranscriptRequest.SerializeToString,
- backend__pb2.TranscriptResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def TTS(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS',
- backend__pb2.TTSRequest.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def TokenizeString(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.TokenizationResponse.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Status(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status',
- backend__pb2.HealthMessage.SerializeToString,
- backend__pb2.StatusResponse.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresSet(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet',
- backend__pb2.StoresSetOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresDelete(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete',
- backend__pb2.StoresDeleteOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresGet(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet',
- backend__pb2.StoresGetOptions.SerializeToString,
- backend__pb2.StoresGetResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresFind(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind',
- backend__pb2.StoresFindOptions.SerializeToString,
- backend__pb2.StoresFindResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
diff --git a/backend/python/sentencetransformers/Makefile b/backend/python/sentencetransformers/Makefile
index 7dbde5cf..ac442897 100644
--- a/backend/python/sentencetransformers/Makefile
+++ b/backend/python/sentencetransformers/Makefile
@@ -1,17 +1,27 @@
.PHONY: sentencetransformers
-sentencetransformers:
+sentencetransformers: protogen
$(MAKE) -C ../common-env/transformers
.PHONY: run
-run:
+run: protogen
@echo "Running sentencetransformers..."
bash run.sh
@echo "sentencetransformers run."
# It is not working well by using command line. It only6 works with IDE like VSCode.
.PHONY: test
-test:
+test: protogen
@echo "Testing sentencetransformers..."
bash test.sh
@echo "sentencetransformers tested."
+
+.PHONY: protogen
+protogen: backend_pb2_grpc.py backend_pb2.py
+
+.PHONY: protogen-clean
+protogen-clean:
+ $(RM) backend_pb2_grpc.py backend_pb2.py
+
+backend_pb2_grpc.py backend_pb2.py:
+ python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
\ No newline at end of file
diff --git a/backend/python/sentencetransformers/backend_pb2.py b/backend/python/sentencetransformers/backend_pb2.py
deleted file mode 100644
index 24b6de3b..00000000
--- a/backend/python/sentencetransformers/backend_pb2.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: backend.proto
-# Protobuf Python Version: 4.25.1
-"""Generated protocol buffer code."""
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import descriptor_pool as _descriptor_pool
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf.internal import builder as _builder
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
-
-_globals = globals()
-_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
-_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals)
-if _descriptor._USE_C_DESCRIPTORS == False:
- _globals['DESCRIPTOR']._options = None
- _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001'
- _globals['_STORESKEY']._serialized_start=26
- _globals['_STORESKEY']._serialized_end=53
- _globals['_STORESVALUE']._serialized_start=55
- _globals['_STORESVALUE']._serialized_end=83
- _globals['_STORESSETOPTIONS']._serialized_start=85
- _globals['_STORESSETOPTIONS']._serialized_end=175
- _globals['_STORESDELETEOPTIONS']._serialized_start=177
- _globals['_STORESDELETEOPTIONS']._serialized_end=232
- _globals['_STORESGETOPTIONS']._serialized_start=234
- _globals['_STORESGETOPTIONS']._serialized_end=286
- _globals['_STORESGETRESULT']._serialized_start=288
- _globals['_STORESGETRESULT']._serialized_end=377
- _globals['_STORESFINDOPTIONS']._serialized_start=379
- _globals['_STORESFINDOPTIONS']._serialized_end=445
- _globals['_STORESFINDRESULT']._serialized_start=447
- _globals['_STORESFINDRESULT']._serialized_end=559
- _globals['_HEALTHMESSAGE']._serialized_start=561
- _globals['_HEALTHMESSAGE']._serialized_end=576
- _globals['_PREDICTOPTIONS']._serialized_start=579
- _globals['_PREDICTOPTIONS']._serialized_end=1451
- _globals['_REPLY']._serialized_start=1453
- _globals['_REPLY']._serialized_end=1477
- _globals['_MODELOPTIONS']._serialized_start=1480
- _globals['_MODELOPTIONS']._serialized_end=2552
- _globals['_RESULT']._serialized_start=2554
- _globals['_RESULT']._serialized_end=2596
- _globals['_EMBEDDINGRESULT']._serialized_start=2598
- _globals['_EMBEDDINGRESULT']._serialized_end=2635
- _globals['_TRANSCRIPTREQUEST']._serialized_start=2637
- _globals['_TRANSCRIPTREQUEST']._serialized_end=2704
- _globals['_TRANSCRIPTRESULT']._serialized_start=2706
- _globals['_TRANSCRIPTRESULT']._serialized_end=2784
- _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786
- _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875
- _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878
- _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093
- _globals['_TTSREQUEST']._serialized_start=3095
- _globals['_TTSREQUEST']._serialized_end=3164
- _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166
- _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220
- _globals['_MEMORYUSAGEDATA']._serialized_start=3223
- _globals['_MEMORYUSAGEDATA']._serialized_end=3365
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365
- _globals['_STATUSRESPONSE']._serialized_start=3368
- _globals['_STATUSRESPONSE']._serialized_end=3541
- _globals['_STATUSRESPONSE_STATE']._serialized_start=3474
- _globals['_STATUSRESPONSE_STATE']._serialized_end=3541
- _globals['_MESSAGE']._serialized_start=3543
- _globals['_MESSAGE']._serialized_end=3583
- _globals['_BACKEND']._serialized_start=3586
- _globals['_BACKEND']._serialized_end=4477
-# @@protoc_insertion_point(module_scope)
diff --git a/backend/python/sentencetransformers/backend_pb2_grpc.py b/backend/python/sentencetransformers/backend_pb2_grpc.py
deleted file mode 100644
index e06fccf3..00000000
--- a/backend/python/sentencetransformers/backend_pb2_grpc.py
+++ /dev/null
@@ -1,495 +0,0 @@
-# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
-"""Client and server classes corresponding to protobuf-defined services."""
-import grpc
-
-import backend_pb2 as backend__pb2
-
-
-class BackendStub(object):
- """Missing associated documentation comment in .proto file."""
-
- def __init__(self, channel):
- """Constructor.
-
- Args:
- channel: A grpc.Channel.
- """
- self.Health = channel.unary_unary(
- '/backend.Backend/Health',
- request_serializer=backend__pb2.HealthMessage.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.Predict = channel.unary_unary(
- '/backend.Backend/Predict',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.LoadModel = channel.unary_unary(
- '/backend.Backend/LoadModel',
- request_serializer=backend__pb2.ModelOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.PredictStream = channel.unary_stream(
- '/backend.Backend/PredictStream',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.Embedding = channel.unary_unary(
- '/backend.Backend/Embedding',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.EmbeddingResult.FromString,
- )
- self.GenerateImage = channel.unary_unary(
- '/backend.Backend/GenerateImage',
- request_serializer=backend__pb2.GenerateImageRequest.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.AudioTranscription = channel.unary_unary(
- '/backend.Backend/AudioTranscription',
- request_serializer=backend__pb2.TranscriptRequest.SerializeToString,
- response_deserializer=backend__pb2.TranscriptResult.FromString,
- )
- self.TTS = channel.unary_unary(
- '/backend.Backend/TTS',
- request_serializer=backend__pb2.TTSRequest.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.TokenizeString = channel.unary_unary(
- '/backend.Backend/TokenizeString',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.TokenizationResponse.FromString,
- )
- self.Status = channel.unary_unary(
- '/backend.Backend/Status',
- request_serializer=backend__pb2.HealthMessage.SerializeToString,
- response_deserializer=backend__pb2.StatusResponse.FromString,
- )
- self.StoresSet = channel.unary_unary(
- '/backend.Backend/StoresSet',
- request_serializer=backend__pb2.StoresSetOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.StoresDelete = channel.unary_unary(
- '/backend.Backend/StoresDelete',
- request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.StoresGet = channel.unary_unary(
- '/backend.Backend/StoresGet',
- request_serializer=backend__pb2.StoresGetOptions.SerializeToString,
- response_deserializer=backend__pb2.StoresGetResult.FromString,
- )
- self.StoresFind = channel.unary_unary(
- '/backend.Backend/StoresFind',
- request_serializer=backend__pb2.StoresFindOptions.SerializeToString,
- response_deserializer=backend__pb2.StoresFindResult.FromString,
- )
-
-
-class BackendServicer(object):
- """Missing associated documentation comment in .proto file."""
-
- def Health(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Predict(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def LoadModel(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def PredictStream(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Embedding(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def GenerateImage(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def AudioTranscription(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def TTS(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def TokenizeString(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Status(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresSet(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresDelete(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresGet(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresFind(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
-
-def add_BackendServicer_to_server(servicer, server):
- rpc_method_handlers = {
- 'Health': grpc.unary_unary_rpc_method_handler(
- servicer.Health,
- request_deserializer=backend__pb2.HealthMessage.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'Predict': grpc.unary_unary_rpc_method_handler(
- servicer.Predict,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'LoadModel': grpc.unary_unary_rpc_method_handler(
- servicer.LoadModel,
- request_deserializer=backend__pb2.ModelOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'PredictStream': grpc.unary_stream_rpc_method_handler(
- servicer.PredictStream,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'Embedding': grpc.unary_unary_rpc_method_handler(
- servicer.Embedding,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.EmbeddingResult.SerializeToString,
- ),
- 'GenerateImage': grpc.unary_unary_rpc_method_handler(
- servicer.GenerateImage,
- request_deserializer=backend__pb2.GenerateImageRequest.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'AudioTranscription': grpc.unary_unary_rpc_method_handler(
- servicer.AudioTranscription,
- request_deserializer=backend__pb2.TranscriptRequest.FromString,
- response_serializer=backend__pb2.TranscriptResult.SerializeToString,
- ),
- 'TTS': grpc.unary_unary_rpc_method_handler(
- servicer.TTS,
- request_deserializer=backend__pb2.TTSRequest.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'TokenizeString': grpc.unary_unary_rpc_method_handler(
- servicer.TokenizeString,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.TokenizationResponse.SerializeToString,
- ),
- 'Status': grpc.unary_unary_rpc_method_handler(
- servicer.Status,
- request_deserializer=backend__pb2.HealthMessage.FromString,
- response_serializer=backend__pb2.StatusResponse.SerializeToString,
- ),
- 'StoresSet': grpc.unary_unary_rpc_method_handler(
- servicer.StoresSet,
- request_deserializer=backend__pb2.StoresSetOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'StoresDelete': grpc.unary_unary_rpc_method_handler(
- servicer.StoresDelete,
- request_deserializer=backend__pb2.StoresDeleteOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'StoresGet': grpc.unary_unary_rpc_method_handler(
- servicer.StoresGet,
- request_deserializer=backend__pb2.StoresGetOptions.FromString,
- response_serializer=backend__pb2.StoresGetResult.SerializeToString,
- ),
- 'StoresFind': grpc.unary_unary_rpc_method_handler(
- servicer.StoresFind,
- request_deserializer=backend__pb2.StoresFindOptions.FromString,
- response_serializer=backend__pb2.StoresFindResult.SerializeToString,
- ),
- }
- generic_handler = grpc.method_handlers_generic_handler(
- 'backend.Backend', rpc_method_handlers)
- server.add_generic_rpc_handlers((generic_handler,))
-
-
- # This class is part of an EXPERIMENTAL API.
-class Backend(object):
- """Missing associated documentation comment in .proto file."""
-
- @staticmethod
- def Health(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health',
- backend__pb2.HealthMessage.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Predict(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def LoadModel(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel',
- backend__pb2.ModelOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def PredictStream(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Embedding(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.EmbeddingResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def GenerateImage(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage',
- backend__pb2.GenerateImageRequest.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def AudioTranscription(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription',
- backend__pb2.TranscriptRequest.SerializeToString,
- backend__pb2.TranscriptResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def TTS(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS',
- backend__pb2.TTSRequest.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def TokenizeString(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.TokenizationResponse.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Status(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status',
- backend__pb2.HealthMessage.SerializeToString,
- backend__pb2.StatusResponse.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresSet(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet',
- backend__pb2.StoresSetOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresDelete(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete',
- backend__pb2.StoresDeleteOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresGet(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet',
- backend__pb2.StoresGetOptions.SerializeToString,
- backend__pb2.StoresGetResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresFind(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind',
- backend__pb2.StoresFindOptions.SerializeToString,
- backend__pb2.StoresFindResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
diff --git a/backend/python/transformers-musicgen/Makefile b/backend/python/transformers-musicgen/Makefile
index a2969d84..e28a356d 100644
--- a/backend/python/transformers-musicgen/Makefile
+++ b/backend/python/transformers-musicgen/Makefile
@@ -1,16 +1,25 @@
-
.PHONY: transformers-musicgen
-transformers-musicgen:
+transformers-musicgen: protogen
$(MAKE) -C ../common-env/transformers
.PHONY: run
-run:
+run: protogen
@echo "Running transformers..."
bash run.sh
@echo "transformers run."
.PHONY: test
-test:
+test: protogen
@echo "Testing transformers..."
bash test.sh
@echo "transformers tested."
+
+.PHONY: protogen
+protogen: backend_pb2_grpc.py backend_pb2.py
+
+.PHONY: protogen-clean
+protogen-clean:
+ $(RM) backend_pb2_grpc.py backend_pb2.py
+
+backend_pb2_grpc.py backend_pb2.py:
+ python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
\ No newline at end of file
diff --git a/backend/python/transformers-musicgen/backend_pb2.py b/backend/python/transformers-musicgen/backend_pb2.py
deleted file mode 100644
index 24b6de3b..00000000
--- a/backend/python/transformers-musicgen/backend_pb2.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: backend.proto
-# Protobuf Python Version: 4.25.1
-"""Generated protocol buffer code."""
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import descriptor_pool as _descriptor_pool
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf.internal import builder as _builder
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
-
-_globals = globals()
-_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
-_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals)
-if _descriptor._USE_C_DESCRIPTORS == False:
- _globals['DESCRIPTOR']._options = None
- _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001'
- _globals['_STORESKEY']._serialized_start=26
- _globals['_STORESKEY']._serialized_end=53
- _globals['_STORESVALUE']._serialized_start=55
- _globals['_STORESVALUE']._serialized_end=83
- _globals['_STORESSETOPTIONS']._serialized_start=85
- _globals['_STORESSETOPTIONS']._serialized_end=175
- _globals['_STORESDELETEOPTIONS']._serialized_start=177
- _globals['_STORESDELETEOPTIONS']._serialized_end=232
- _globals['_STORESGETOPTIONS']._serialized_start=234
- _globals['_STORESGETOPTIONS']._serialized_end=286
- _globals['_STORESGETRESULT']._serialized_start=288
- _globals['_STORESGETRESULT']._serialized_end=377
- _globals['_STORESFINDOPTIONS']._serialized_start=379
- _globals['_STORESFINDOPTIONS']._serialized_end=445
- _globals['_STORESFINDRESULT']._serialized_start=447
- _globals['_STORESFINDRESULT']._serialized_end=559
- _globals['_HEALTHMESSAGE']._serialized_start=561
- _globals['_HEALTHMESSAGE']._serialized_end=576
- _globals['_PREDICTOPTIONS']._serialized_start=579
- _globals['_PREDICTOPTIONS']._serialized_end=1451
- _globals['_REPLY']._serialized_start=1453
- _globals['_REPLY']._serialized_end=1477
- _globals['_MODELOPTIONS']._serialized_start=1480
- _globals['_MODELOPTIONS']._serialized_end=2552
- _globals['_RESULT']._serialized_start=2554
- _globals['_RESULT']._serialized_end=2596
- _globals['_EMBEDDINGRESULT']._serialized_start=2598
- _globals['_EMBEDDINGRESULT']._serialized_end=2635
- _globals['_TRANSCRIPTREQUEST']._serialized_start=2637
- _globals['_TRANSCRIPTREQUEST']._serialized_end=2704
- _globals['_TRANSCRIPTRESULT']._serialized_start=2706
- _globals['_TRANSCRIPTRESULT']._serialized_end=2784
- _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786
- _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875
- _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878
- _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093
- _globals['_TTSREQUEST']._serialized_start=3095
- _globals['_TTSREQUEST']._serialized_end=3164
- _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166
- _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220
- _globals['_MEMORYUSAGEDATA']._serialized_start=3223
- _globals['_MEMORYUSAGEDATA']._serialized_end=3365
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365
- _globals['_STATUSRESPONSE']._serialized_start=3368
- _globals['_STATUSRESPONSE']._serialized_end=3541
- _globals['_STATUSRESPONSE_STATE']._serialized_start=3474
- _globals['_STATUSRESPONSE_STATE']._serialized_end=3541
- _globals['_MESSAGE']._serialized_start=3543
- _globals['_MESSAGE']._serialized_end=3583
- _globals['_BACKEND']._serialized_start=3586
- _globals['_BACKEND']._serialized_end=4477
-# @@protoc_insertion_point(module_scope)
diff --git a/backend/python/transformers-musicgen/backend_pb2_grpc.py b/backend/python/transformers-musicgen/backend_pb2_grpc.py
deleted file mode 100644
index e06fccf3..00000000
--- a/backend/python/transformers-musicgen/backend_pb2_grpc.py
+++ /dev/null
@@ -1,495 +0,0 @@
-# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
-"""Client and server classes corresponding to protobuf-defined services."""
-import grpc
-
-import backend_pb2 as backend__pb2
-
-
-class BackendStub(object):
- """Missing associated documentation comment in .proto file."""
-
- def __init__(self, channel):
- """Constructor.
-
- Args:
- channel: A grpc.Channel.
- """
- self.Health = channel.unary_unary(
- '/backend.Backend/Health',
- request_serializer=backend__pb2.HealthMessage.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.Predict = channel.unary_unary(
- '/backend.Backend/Predict',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.LoadModel = channel.unary_unary(
- '/backend.Backend/LoadModel',
- request_serializer=backend__pb2.ModelOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.PredictStream = channel.unary_stream(
- '/backend.Backend/PredictStream',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.Embedding = channel.unary_unary(
- '/backend.Backend/Embedding',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.EmbeddingResult.FromString,
- )
- self.GenerateImage = channel.unary_unary(
- '/backend.Backend/GenerateImage',
- request_serializer=backend__pb2.GenerateImageRequest.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.AudioTranscription = channel.unary_unary(
- '/backend.Backend/AudioTranscription',
- request_serializer=backend__pb2.TranscriptRequest.SerializeToString,
- response_deserializer=backend__pb2.TranscriptResult.FromString,
- )
- self.TTS = channel.unary_unary(
- '/backend.Backend/TTS',
- request_serializer=backend__pb2.TTSRequest.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.TokenizeString = channel.unary_unary(
- '/backend.Backend/TokenizeString',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.TokenizationResponse.FromString,
- )
- self.Status = channel.unary_unary(
- '/backend.Backend/Status',
- request_serializer=backend__pb2.HealthMessage.SerializeToString,
- response_deserializer=backend__pb2.StatusResponse.FromString,
- )
- self.StoresSet = channel.unary_unary(
- '/backend.Backend/StoresSet',
- request_serializer=backend__pb2.StoresSetOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.StoresDelete = channel.unary_unary(
- '/backend.Backend/StoresDelete',
- request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.StoresGet = channel.unary_unary(
- '/backend.Backend/StoresGet',
- request_serializer=backend__pb2.StoresGetOptions.SerializeToString,
- response_deserializer=backend__pb2.StoresGetResult.FromString,
- )
- self.StoresFind = channel.unary_unary(
- '/backend.Backend/StoresFind',
- request_serializer=backend__pb2.StoresFindOptions.SerializeToString,
- response_deserializer=backend__pb2.StoresFindResult.FromString,
- )
-
-
-class BackendServicer(object):
- """Missing associated documentation comment in .proto file."""
-
- def Health(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Predict(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def LoadModel(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def PredictStream(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Embedding(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def GenerateImage(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def AudioTranscription(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def TTS(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def TokenizeString(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Status(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresSet(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresDelete(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresGet(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresFind(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
-
-def add_BackendServicer_to_server(servicer, server):
- rpc_method_handlers = {
- 'Health': grpc.unary_unary_rpc_method_handler(
- servicer.Health,
- request_deserializer=backend__pb2.HealthMessage.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'Predict': grpc.unary_unary_rpc_method_handler(
- servicer.Predict,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'LoadModel': grpc.unary_unary_rpc_method_handler(
- servicer.LoadModel,
- request_deserializer=backend__pb2.ModelOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'PredictStream': grpc.unary_stream_rpc_method_handler(
- servicer.PredictStream,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'Embedding': grpc.unary_unary_rpc_method_handler(
- servicer.Embedding,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.EmbeddingResult.SerializeToString,
- ),
- 'GenerateImage': grpc.unary_unary_rpc_method_handler(
- servicer.GenerateImage,
- request_deserializer=backend__pb2.GenerateImageRequest.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'AudioTranscription': grpc.unary_unary_rpc_method_handler(
- servicer.AudioTranscription,
- request_deserializer=backend__pb2.TranscriptRequest.FromString,
- response_serializer=backend__pb2.TranscriptResult.SerializeToString,
- ),
- 'TTS': grpc.unary_unary_rpc_method_handler(
- servicer.TTS,
- request_deserializer=backend__pb2.TTSRequest.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'TokenizeString': grpc.unary_unary_rpc_method_handler(
- servicer.TokenizeString,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.TokenizationResponse.SerializeToString,
- ),
- 'Status': grpc.unary_unary_rpc_method_handler(
- servicer.Status,
- request_deserializer=backend__pb2.HealthMessage.FromString,
- response_serializer=backend__pb2.StatusResponse.SerializeToString,
- ),
- 'StoresSet': grpc.unary_unary_rpc_method_handler(
- servicer.StoresSet,
- request_deserializer=backend__pb2.StoresSetOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'StoresDelete': grpc.unary_unary_rpc_method_handler(
- servicer.StoresDelete,
- request_deserializer=backend__pb2.StoresDeleteOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'StoresGet': grpc.unary_unary_rpc_method_handler(
- servicer.StoresGet,
- request_deserializer=backend__pb2.StoresGetOptions.FromString,
- response_serializer=backend__pb2.StoresGetResult.SerializeToString,
- ),
- 'StoresFind': grpc.unary_unary_rpc_method_handler(
- servicer.StoresFind,
- request_deserializer=backend__pb2.StoresFindOptions.FromString,
- response_serializer=backend__pb2.StoresFindResult.SerializeToString,
- ),
- }
- generic_handler = grpc.method_handlers_generic_handler(
- 'backend.Backend', rpc_method_handlers)
- server.add_generic_rpc_handlers((generic_handler,))
-
-
- # This class is part of an EXPERIMENTAL API.
-class Backend(object):
- """Missing associated documentation comment in .proto file."""
-
- @staticmethod
- def Health(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health',
- backend__pb2.HealthMessage.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Predict(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def LoadModel(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel',
- backend__pb2.ModelOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def PredictStream(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Embedding(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.EmbeddingResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def GenerateImage(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage',
- backend__pb2.GenerateImageRequest.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def AudioTranscription(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription',
- backend__pb2.TranscriptRequest.SerializeToString,
- backend__pb2.TranscriptResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def TTS(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS',
- backend__pb2.TTSRequest.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def TokenizeString(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.TokenizationResponse.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Status(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status',
- backend__pb2.HealthMessage.SerializeToString,
- backend__pb2.StatusResponse.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresSet(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet',
- backend__pb2.StoresSetOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresDelete(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete',
- backend__pb2.StoresDeleteOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresGet(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet',
- backend__pb2.StoresGetOptions.SerializeToString,
- backend__pb2.StoresGetResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresFind(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind',
- backend__pb2.StoresFindOptions.SerializeToString,
- backend__pb2.StoresFindResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
diff --git a/backend/python/transformers/Makefile b/backend/python/transformers/Makefile
index 4eeb9ad5..afe48405 100644
--- a/backend/python/transformers/Makefile
+++ b/backend/python/transformers/Makefile
@@ -1,16 +1,26 @@
.PHONY: transformers
-transformers:
+transformers: protogen
$(MAKE) -C ../common-env/transformers
.PHONY: run
-run:
+run: protogen
@echo "Running transformers..."
bash run.sh
@echo "transformers run."
# It is not working well by using command line. It only6 works with IDE like VSCode.
.PHONY: test
-test:
+test: protogen
@echo "Testing transformers..."
bash test.sh
@echo "transformers tested."
+
+.PHONY: protogen
+protogen: backend_pb2_grpc.py backend_pb2.py
+
+.PHONY: protogen-clean
+protogen-clean:
+ $(RM) backend_pb2_grpc.py backend_pb2.py
+
+backend_pb2_grpc.py backend_pb2.py:
+ python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
\ No newline at end of file
diff --git a/backend/python/transformers/backend_pb2.py b/backend/python/transformers/backend_pb2.py
deleted file mode 100644
index 24b6de3b..00000000
--- a/backend/python/transformers/backend_pb2.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: backend.proto
-# Protobuf Python Version: 4.25.1
-"""Generated protocol buffer code."""
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import descriptor_pool as _descriptor_pool
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf.internal import builder as _builder
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
-
-_globals = globals()
-_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
-_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals)
-if _descriptor._USE_C_DESCRIPTORS == False:
- _globals['DESCRIPTOR']._options = None
- _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001'
- _globals['_STORESKEY']._serialized_start=26
- _globals['_STORESKEY']._serialized_end=53
- _globals['_STORESVALUE']._serialized_start=55
- _globals['_STORESVALUE']._serialized_end=83
- _globals['_STORESSETOPTIONS']._serialized_start=85
- _globals['_STORESSETOPTIONS']._serialized_end=175
- _globals['_STORESDELETEOPTIONS']._serialized_start=177
- _globals['_STORESDELETEOPTIONS']._serialized_end=232
- _globals['_STORESGETOPTIONS']._serialized_start=234
- _globals['_STORESGETOPTIONS']._serialized_end=286
- _globals['_STORESGETRESULT']._serialized_start=288
- _globals['_STORESGETRESULT']._serialized_end=377
- _globals['_STORESFINDOPTIONS']._serialized_start=379
- _globals['_STORESFINDOPTIONS']._serialized_end=445
- _globals['_STORESFINDRESULT']._serialized_start=447
- _globals['_STORESFINDRESULT']._serialized_end=559
- _globals['_HEALTHMESSAGE']._serialized_start=561
- _globals['_HEALTHMESSAGE']._serialized_end=576
- _globals['_PREDICTOPTIONS']._serialized_start=579
- _globals['_PREDICTOPTIONS']._serialized_end=1451
- _globals['_REPLY']._serialized_start=1453
- _globals['_REPLY']._serialized_end=1477
- _globals['_MODELOPTIONS']._serialized_start=1480
- _globals['_MODELOPTIONS']._serialized_end=2552
- _globals['_RESULT']._serialized_start=2554
- _globals['_RESULT']._serialized_end=2596
- _globals['_EMBEDDINGRESULT']._serialized_start=2598
- _globals['_EMBEDDINGRESULT']._serialized_end=2635
- _globals['_TRANSCRIPTREQUEST']._serialized_start=2637
- _globals['_TRANSCRIPTREQUEST']._serialized_end=2704
- _globals['_TRANSCRIPTRESULT']._serialized_start=2706
- _globals['_TRANSCRIPTRESULT']._serialized_end=2784
- _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786
- _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875
- _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878
- _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093
- _globals['_TTSREQUEST']._serialized_start=3095
- _globals['_TTSREQUEST']._serialized_end=3164
- _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166
- _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220
- _globals['_MEMORYUSAGEDATA']._serialized_start=3223
- _globals['_MEMORYUSAGEDATA']._serialized_end=3365
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365
- _globals['_STATUSRESPONSE']._serialized_start=3368
- _globals['_STATUSRESPONSE']._serialized_end=3541
- _globals['_STATUSRESPONSE_STATE']._serialized_start=3474
- _globals['_STATUSRESPONSE_STATE']._serialized_end=3541
- _globals['_MESSAGE']._serialized_start=3543
- _globals['_MESSAGE']._serialized_end=3583
- _globals['_BACKEND']._serialized_start=3586
- _globals['_BACKEND']._serialized_end=4477
-# @@protoc_insertion_point(module_scope)
diff --git a/backend/python/transformers/backend_pb2_grpc.py b/backend/python/transformers/backend_pb2_grpc.py
deleted file mode 100644
index e06fccf3..00000000
--- a/backend/python/transformers/backend_pb2_grpc.py
+++ /dev/null
@@ -1,495 +0,0 @@
-# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
-"""Client and server classes corresponding to protobuf-defined services."""
-import grpc
-
-import backend_pb2 as backend__pb2
-
-
-class BackendStub(object):
- """Missing associated documentation comment in .proto file."""
-
- def __init__(self, channel):
- """Constructor.
-
- Args:
- channel: A grpc.Channel.
- """
- self.Health = channel.unary_unary(
- '/backend.Backend/Health',
- request_serializer=backend__pb2.HealthMessage.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.Predict = channel.unary_unary(
- '/backend.Backend/Predict',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.LoadModel = channel.unary_unary(
- '/backend.Backend/LoadModel',
- request_serializer=backend__pb2.ModelOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.PredictStream = channel.unary_stream(
- '/backend.Backend/PredictStream',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.Embedding = channel.unary_unary(
- '/backend.Backend/Embedding',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.EmbeddingResult.FromString,
- )
- self.GenerateImage = channel.unary_unary(
- '/backend.Backend/GenerateImage',
- request_serializer=backend__pb2.GenerateImageRequest.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.AudioTranscription = channel.unary_unary(
- '/backend.Backend/AudioTranscription',
- request_serializer=backend__pb2.TranscriptRequest.SerializeToString,
- response_deserializer=backend__pb2.TranscriptResult.FromString,
- )
- self.TTS = channel.unary_unary(
- '/backend.Backend/TTS',
- request_serializer=backend__pb2.TTSRequest.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.TokenizeString = channel.unary_unary(
- '/backend.Backend/TokenizeString',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.TokenizationResponse.FromString,
- )
- self.Status = channel.unary_unary(
- '/backend.Backend/Status',
- request_serializer=backend__pb2.HealthMessage.SerializeToString,
- response_deserializer=backend__pb2.StatusResponse.FromString,
- )
- self.StoresSet = channel.unary_unary(
- '/backend.Backend/StoresSet',
- request_serializer=backend__pb2.StoresSetOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.StoresDelete = channel.unary_unary(
- '/backend.Backend/StoresDelete',
- request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.StoresGet = channel.unary_unary(
- '/backend.Backend/StoresGet',
- request_serializer=backend__pb2.StoresGetOptions.SerializeToString,
- response_deserializer=backend__pb2.StoresGetResult.FromString,
- )
- self.StoresFind = channel.unary_unary(
- '/backend.Backend/StoresFind',
- request_serializer=backend__pb2.StoresFindOptions.SerializeToString,
- response_deserializer=backend__pb2.StoresFindResult.FromString,
- )
-
-
-class BackendServicer(object):
- """Missing associated documentation comment in .proto file."""
-
- def Health(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Predict(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def LoadModel(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def PredictStream(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Embedding(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def GenerateImage(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def AudioTranscription(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def TTS(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def TokenizeString(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Status(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresSet(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresDelete(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresGet(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresFind(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
-
-def add_BackendServicer_to_server(servicer, server):
- rpc_method_handlers = {
- 'Health': grpc.unary_unary_rpc_method_handler(
- servicer.Health,
- request_deserializer=backend__pb2.HealthMessage.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'Predict': grpc.unary_unary_rpc_method_handler(
- servicer.Predict,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'LoadModel': grpc.unary_unary_rpc_method_handler(
- servicer.LoadModel,
- request_deserializer=backend__pb2.ModelOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'PredictStream': grpc.unary_stream_rpc_method_handler(
- servicer.PredictStream,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'Embedding': grpc.unary_unary_rpc_method_handler(
- servicer.Embedding,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.EmbeddingResult.SerializeToString,
- ),
- 'GenerateImage': grpc.unary_unary_rpc_method_handler(
- servicer.GenerateImage,
- request_deserializer=backend__pb2.GenerateImageRequest.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'AudioTranscription': grpc.unary_unary_rpc_method_handler(
- servicer.AudioTranscription,
- request_deserializer=backend__pb2.TranscriptRequest.FromString,
- response_serializer=backend__pb2.TranscriptResult.SerializeToString,
- ),
- 'TTS': grpc.unary_unary_rpc_method_handler(
- servicer.TTS,
- request_deserializer=backend__pb2.TTSRequest.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'TokenizeString': grpc.unary_unary_rpc_method_handler(
- servicer.TokenizeString,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.TokenizationResponse.SerializeToString,
- ),
- 'Status': grpc.unary_unary_rpc_method_handler(
- servicer.Status,
- request_deserializer=backend__pb2.HealthMessage.FromString,
- response_serializer=backend__pb2.StatusResponse.SerializeToString,
- ),
- 'StoresSet': grpc.unary_unary_rpc_method_handler(
- servicer.StoresSet,
- request_deserializer=backend__pb2.StoresSetOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'StoresDelete': grpc.unary_unary_rpc_method_handler(
- servicer.StoresDelete,
- request_deserializer=backend__pb2.StoresDeleteOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'StoresGet': grpc.unary_unary_rpc_method_handler(
- servicer.StoresGet,
- request_deserializer=backend__pb2.StoresGetOptions.FromString,
- response_serializer=backend__pb2.StoresGetResult.SerializeToString,
- ),
- 'StoresFind': grpc.unary_unary_rpc_method_handler(
- servicer.StoresFind,
- request_deserializer=backend__pb2.StoresFindOptions.FromString,
- response_serializer=backend__pb2.StoresFindResult.SerializeToString,
- ),
- }
- generic_handler = grpc.method_handlers_generic_handler(
- 'backend.Backend', rpc_method_handlers)
- server.add_generic_rpc_handlers((generic_handler,))
-
-
- # This class is part of an EXPERIMENTAL API.
-class Backend(object):
- """Missing associated documentation comment in .proto file."""
-
- @staticmethod
- def Health(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health',
- backend__pb2.HealthMessage.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Predict(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def LoadModel(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel',
- backend__pb2.ModelOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def PredictStream(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Embedding(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.EmbeddingResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def GenerateImage(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage',
- backend__pb2.GenerateImageRequest.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def AudioTranscription(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription',
- backend__pb2.TranscriptRequest.SerializeToString,
- backend__pb2.TranscriptResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def TTS(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS',
- backend__pb2.TTSRequest.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def TokenizeString(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.TokenizationResponse.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Status(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status',
- backend__pb2.HealthMessage.SerializeToString,
- backend__pb2.StatusResponse.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresSet(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet',
- backend__pb2.StoresSetOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresDelete(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete',
- backend__pb2.StoresDeleteOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresGet(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet',
- backend__pb2.StoresGetOptions.SerializeToString,
- backend__pb2.StoresGetResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresFind(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind',
- backend__pb2.StoresFindOptions.SerializeToString,
- backend__pb2.StoresFindResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
diff --git a/backend/python/vall-e-x/Makefile b/backend/python/vall-e-x/Makefile
index 8f34f559..d7a80e55 100644
--- a/backend/python/vall-e-x/Makefile
+++ b/backend/python/vall-e-x/Makefile
@@ -3,18 +3,28 @@ export SKIP_CONDA=1
endif
.PHONY: ttsvalle
-ttsvalle:
+ttsvalle: protogen
$(MAKE) -C ../common-env/transformers
bash install.sh
.PHONY: run
-run:
+run: protogen
@echo "Running ttsvalle..."
bash run.sh
@echo "ttsvalle run."
.PHONY: test
-test:
+test: protogen
@echo "Testing valle..."
bash test.sh
@echo "valle tested."
+
+.PHONY: protogen
+protogen: backend_pb2_grpc.py backend_pb2.py
+
+.PHONY: protogen-clean
+protogen-clean:
+ $(RM) backend_pb2_grpc.py backend_pb2.py
+
+backend_pb2_grpc.py backend_pb2.py:
+ python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
\ No newline at end of file
diff --git a/backend/python/vall-e-x/backend_pb2.py b/backend/python/vall-e-x/backend_pb2.py
deleted file mode 100644
index 24b6de3b..00000000
--- a/backend/python/vall-e-x/backend_pb2.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: backend.proto
-# Protobuf Python Version: 4.25.1
-"""Generated protocol buffer code."""
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import descriptor_pool as _descriptor_pool
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf.internal import builder as _builder
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
-
-_globals = globals()
-_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
-_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals)
-if _descriptor._USE_C_DESCRIPTORS == False:
- _globals['DESCRIPTOR']._options = None
- _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001'
- _globals['_STORESKEY']._serialized_start=26
- _globals['_STORESKEY']._serialized_end=53
- _globals['_STORESVALUE']._serialized_start=55
- _globals['_STORESVALUE']._serialized_end=83
- _globals['_STORESSETOPTIONS']._serialized_start=85
- _globals['_STORESSETOPTIONS']._serialized_end=175
- _globals['_STORESDELETEOPTIONS']._serialized_start=177
- _globals['_STORESDELETEOPTIONS']._serialized_end=232
- _globals['_STORESGETOPTIONS']._serialized_start=234
- _globals['_STORESGETOPTIONS']._serialized_end=286
- _globals['_STORESGETRESULT']._serialized_start=288
- _globals['_STORESGETRESULT']._serialized_end=377
- _globals['_STORESFINDOPTIONS']._serialized_start=379
- _globals['_STORESFINDOPTIONS']._serialized_end=445
- _globals['_STORESFINDRESULT']._serialized_start=447
- _globals['_STORESFINDRESULT']._serialized_end=559
- _globals['_HEALTHMESSAGE']._serialized_start=561
- _globals['_HEALTHMESSAGE']._serialized_end=576
- _globals['_PREDICTOPTIONS']._serialized_start=579
- _globals['_PREDICTOPTIONS']._serialized_end=1451
- _globals['_REPLY']._serialized_start=1453
- _globals['_REPLY']._serialized_end=1477
- _globals['_MODELOPTIONS']._serialized_start=1480
- _globals['_MODELOPTIONS']._serialized_end=2552
- _globals['_RESULT']._serialized_start=2554
- _globals['_RESULT']._serialized_end=2596
- _globals['_EMBEDDINGRESULT']._serialized_start=2598
- _globals['_EMBEDDINGRESULT']._serialized_end=2635
- _globals['_TRANSCRIPTREQUEST']._serialized_start=2637
- _globals['_TRANSCRIPTREQUEST']._serialized_end=2704
- _globals['_TRANSCRIPTRESULT']._serialized_start=2706
- _globals['_TRANSCRIPTRESULT']._serialized_end=2784
- _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786
- _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875
- _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878
- _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093
- _globals['_TTSREQUEST']._serialized_start=3095
- _globals['_TTSREQUEST']._serialized_end=3164
- _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166
- _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220
- _globals['_MEMORYUSAGEDATA']._serialized_start=3223
- _globals['_MEMORYUSAGEDATA']._serialized_end=3365
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365
- _globals['_STATUSRESPONSE']._serialized_start=3368
- _globals['_STATUSRESPONSE']._serialized_end=3541
- _globals['_STATUSRESPONSE_STATE']._serialized_start=3474
- _globals['_STATUSRESPONSE_STATE']._serialized_end=3541
- _globals['_MESSAGE']._serialized_start=3543
- _globals['_MESSAGE']._serialized_end=3583
- _globals['_BACKEND']._serialized_start=3586
- _globals['_BACKEND']._serialized_end=4477
-# @@protoc_insertion_point(module_scope)
diff --git a/backend/python/vall-e-x/backend_pb2_grpc.py b/backend/python/vall-e-x/backend_pb2_grpc.py
deleted file mode 100644
index e06fccf3..00000000
--- a/backend/python/vall-e-x/backend_pb2_grpc.py
+++ /dev/null
@@ -1,495 +0,0 @@
-# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
-"""Client and server classes corresponding to protobuf-defined services."""
-import grpc
-
-import backend_pb2 as backend__pb2
-
-
-class BackendStub(object):
- """Missing associated documentation comment in .proto file."""
-
- def __init__(self, channel):
- """Constructor.
-
- Args:
- channel: A grpc.Channel.
- """
- self.Health = channel.unary_unary(
- '/backend.Backend/Health',
- request_serializer=backend__pb2.HealthMessage.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.Predict = channel.unary_unary(
- '/backend.Backend/Predict',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.LoadModel = channel.unary_unary(
- '/backend.Backend/LoadModel',
- request_serializer=backend__pb2.ModelOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.PredictStream = channel.unary_stream(
- '/backend.Backend/PredictStream',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.Embedding = channel.unary_unary(
- '/backend.Backend/Embedding',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.EmbeddingResult.FromString,
- )
- self.GenerateImage = channel.unary_unary(
- '/backend.Backend/GenerateImage',
- request_serializer=backend__pb2.GenerateImageRequest.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.AudioTranscription = channel.unary_unary(
- '/backend.Backend/AudioTranscription',
- request_serializer=backend__pb2.TranscriptRequest.SerializeToString,
- response_deserializer=backend__pb2.TranscriptResult.FromString,
- )
- self.TTS = channel.unary_unary(
- '/backend.Backend/TTS',
- request_serializer=backend__pb2.TTSRequest.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.TokenizeString = channel.unary_unary(
- '/backend.Backend/TokenizeString',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.TokenizationResponse.FromString,
- )
- self.Status = channel.unary_unary(
- '/backend.Backend/Status',
- request_serializer=backend__pb2.HealthMessage.SerializeToString,
- response_deserializer=backend__pb2.StatusResponse.FromString,
- )
- self.StoresSet = channel.unary_unary(
- '/backend.Backend/StoresSet',
- request_serializer=backend__pb2.StoresSetOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.StoresDelete = channel.unary_unary(
- '/backend.Backend/StoresDelete',
- request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.StoresGet = channel.unary_unary(
- '/backend.Backend/StoresGet',
- request_serializer=backend__pb2.StoresGetOptions.SerializeToString,
- response_deserializer=backend__pb2.StoresGetResult.FromString,
- )
- self.StoresFind = channel.unary_unary(
- '/backend.Backend/StoresFind',
- request_serializer=backend__pb2.StoresFindOptions.SerializeToString,
- response_deserializer=backend__pb2.StoresFindResult.FromString,
- )
-
-
-class BackendServicer(object):
- """Missing associated documentation comment in .proto file."""
-
- def Health(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Predict(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def LoadModel(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def PredictStream(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Embedding(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def GenerateImage(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def AudioTranscription(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def TTS(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def TokenizeString(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Status(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresSet(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresDelete(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresGet(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresFind(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
-
-def add_BackendServicer_to_server(servicer, server):
- rpc_method_handlers = {
- 'Health': grpc.unary_unary_rpc_method_handler(
- servicer.Health,
- request_deserializer=backend__pb2.HealthMessage.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'Predict': grpc.unary_unary_rpc_method_handler(
- servicer.Predict,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'LoadModel': grpc.unary_unary_rpc_method_handler(
- servicer.LoadModel,
- request_deserializer=backend__pb2.ModelOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'PredictStream': grpc.unary_stream_rpc_method_handler(
- servicer.PredictStream,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'Embedding': grpc.unary_unary_rpc_method_handler(
- servicer.Embedding,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.EmbeddingResult.SerializeToString,
- ),
- 'GenerateImage': grpc.unary_unary_rpc_method_handler(
- servicer.GenerateImage,
- request_deserializer=backend__pb2.GenerateImageRequest.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'AudioTranscription': grpc.unary_unary_rpc_method_handler(
- servicer.AudioTranscription,
- request_deserializer=backend__pb2.TranscriptRequest.FromString,
- response_serializer=backend__pb2.TranscriptResult.SerializeToString,
- ),
- 'TTS': grpc.unary_unary_rpc_method_handler(
- servicer.TTS,
- request_deserializer=backend__pb2.TTSRequest.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'TokenizeString': grpc.unary_unary_rpc_method_handler(
- servicer.TokenizeString,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.TokenizationResponse.SerializeToString,
- ),
- 'Status': grpc.unary_unary_rpc_method_handler(
- servicer.Status,
- request_deserializer=backend__pb2.HealthMessage.FromString,
- response_serializer=backend__pb2.StatusResponse.SerializeToString,
- ),
- 'StoresSet': grpc.unary_unary_rpc_method_handler(
- servicer.StoresSet,
- request_deserializer=backend__pb2.StoresSetOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'StoresDelete': grpc.unary_unary_rpc_method_handler(
- servicer.StoresDelete,
- request_deserializer=backend__pb2.StoresDeleteOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'StoresGet': grpc.unary_unary_rpc_method_handler(
- servicer.StoresGet,
- request_deserializer=backend__pb2.StoresGetOptions.FromString,
- response_serializer=backend__pb2.StoresGetResult.SerializeToString,
- ),
- 'StoresFind': grpc.unary_unary_rpc_method_handler(
- servicer.StoresFind,
- request_deserializer=backend__pb2.StoresFindOptions.FromString,
- response_serializer=backend__pb2.StoresFindResult.SerializeToString,
- ),
- }
- generic_handler = grpc.method_handlers_generic_handler(
- 'backend.Backend', rpc_method_handlers)
- server.add_generic_rpc_handlers((generic_handler,))
-
-
- # This class is part of an EXPERIMENTAL API.
-class Backend(object):
- """Missing associated documentation comment in .proto file."""
-
- @staticmethod
- def Health(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health',
- backend__pb2.HealthMessage.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Predict(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def LoadModel(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel',
- backend__pb2.ModelOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def PredictStream(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Embedding(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.EmbeddingResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def GenerateImage(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage',
- backend__pb2.GenerateImageRequest.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def AudioTranscription(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription',
- backend__pb2.TranscriptRequest.SerializeToString,
- backend__pb2.TranscriptResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def TTS(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS',
- backend__pb2.TTSRequest.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def TokenizeString(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.TokenizationResponse.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Status(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status',
- backend__pb2.HealthMessage.SerializeToString,
- backend__pb2.StatusResponse.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresSet(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet',
- backend__pb2.StoresSetOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresDelete(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete',
- backend__pb2.StoresDeleteOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresGet(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet',
- backend__pb2.StoresGetOptions.SerializeToString,
- backend__pb2.StoresGetResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresFind(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind',
- backend__pb2.StoresFindOptions.SerializeToString,
- backend__pb2.StoresFindResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
diff --git a/backend/python/vllm/Makefile b/backend/python/vllm/Makefile
index 9ee5886d..3e1fdd77 100644
--- a/backend/python/vllm/Makefile
+++ b/backend/python/vllm/Makefile
@@ -1,15 +1,25 @@
.PHONY: vllm
-vllm:
+vllm: protogen
$(MAKE) -C ../common-env/transformers
.PHONY: run
-run:
+run: protogen
@echo "Running vllm..."
bash run.sh
@echo "vllm run."
.PHONY: test
-test:
+test: protogen
@echo "Testing vllm..."
bash test.sh
- @echo "vllm tested."
\ No newline at end of file
+ @echo "vllm tested."
+
+.PHONY: protogen
+protogen: backend_pb2_grpc.py backend_pb2.py
+
+.PHONY: protogen-clean
+protogen-clean:
+ $(RM) backend_pb2_grpc.py backend_pb2.py
+
+backend_pb2_grpc.py backend_pb2.py:
+ python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
\ No newline at end of file
diff --git a/backend/python/vllm/backend_pb2.py b/backend/python/vllm/backend_pb2.py
deleted file mode 100644
index 24b6de3b..00000000
--- a/backend/python/vllm/backend_pb2.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: backend.proto
-# Protobuf Python Version: 4.25.1
-"""Generated protocol buffer code."""
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import descriptor_pool as _descriptor_pool
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf.internal import builder as _builder
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
-
-_globals = globals()
-_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
-_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals)
-if _descriptor._USE_C_DESCRIPTORS == False:
- _globals['DESCRIPTOR']._options = None
- _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001'
- _globals['_STORESKEY']._serialized_start=26
- _globals['_STORESKEY']._serialized_end=53
- _globals['_STORESVALUE']._serialized_start=55
- _globals['_STORESVALUE']._serialized_end=83
- _globals['_STORESSETOPTIONS']._serialized_start=85
- _globals['_STORESSETOPTIONS']._serialized_end=175
- _globals['_STORESDELETEOPTIONS']._serialized_start=177
- _globals['_STORESDELETEOPTIONS']._serialized_end=232
- _globals['_STORESGETOPTIONS']._serialized_start=234
- _globals['_STORESGETOPTIONS']._serialized_end=286
- _globals['_STORESGETRESULT']._serialized_start=288
- _globals['_STORESGETRESULT']._serialized_end=377
- _globals['_STORESFINDOPTIONS']._serialized_start=379
- _globals['_STORESFINDOPTIONS']._serialized_end=445
- _globals['_STORESFINDRESULT']._serialized_start=447
- _globals['_STORESFINDRESULT']._serialized_end=559
- _globals['_HEALTHMESSAGE']._serialized_start=561
- _globals['_HEALTHMESSAGE']._serialized_end=576
- _globals['_PREDICTOPTIONS']._serialized_start=579
- _globals['_PREDICTOPTIONS']._serialized_end=1451
- _globals['_REPLY']._serialized_start=1453
- _globals['_REPLY']._serialized_end=1477
- _globals['_MODELOPTIONS']._serialized_start=1480
- _globals['_MODELOPTIONS']._serialized_end=2552
- _globals['_RESULT']._serialized_start=2554
- _globals['_RESULT']._serialized_end=2596
- _globals['_EMBEDDINGRESULT']._serialized_start=2598
- _globals['_EMBEDDINGRESULT']._serialized_end=2635
- _globals['_TRANSCRIPTREQUEST']._serialized_start=2637
- _globals['_TRANSCRIPTREQUEST']._serialized_end=2704
- _globals['_TRANSCRIPTRESULT']._serialized_start=2706
- _globals['_TRANSCRIPTRESULT']._serialized_end=2784
- _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786
- _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875
- _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878
- _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093
- _globals['_TTSREQUEST']._serialized_start=3095
- _globals['_TTSREQUEST']._serialized_end=3164
- _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166
- _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220
- _globals['_MEMORYUSAGEDATA']._serialized_start=3223
- _globals['_MEMORYUSAGEDATA']._serialized_end=3365
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317
- _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365
- _globals['_STATUSRESPONSE']._serialized_start=3368
- _globals['_STATUSRESPONSE']._serialized_end=3541
- _globals['_STATUSRESPONSE_STATE']._serialized_start=3474
- _globals['_STATUSRESPONSE_STATE']._serialized_end=3541
- _globals['_MESSAGE']._serialized_start=3543
- _globals['_MESSAGE']._serialized_end=3583
- _globals['_BACKEND']._serialized_start=3586
- _globals['_BACKEND']._serialized_end=4477
-# @@protoc_insertion_point(module_scope)
diff --git a/backend/python/vllm/backend_pb2_grpc.py b/backend/python/vllm/backend_pb2_grpc.py
deleted file mode 100644
index e06fccf3..00000000
--- a/backend/python/vllm/backend_pb2_grpc.py
+++ /dev/null
@@ -1,495 +0,0 @@
-# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
-"""Client and server classes corresponding to protobuf-defined services."""
-import grpc
-
-import backend_pb2 as backend__pb2
-
-
-class BackendStub(object):
- """Missing associated documentation comment in .proto file."""
-
- def __init__(self, channel):
- """Constructor.
-
- Args:
- channel: A grpc.Channel.
- """
- self.Health = channel.unary_unary(
- '/backend.Backend/Health',
- request_serializer=backend__pb2.HealthMessage.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.Predict = channel.unary_unary(
- '/backend.Backend/Predict',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.LoadModel = channel.unary_unary(
- '/backend.Backend/LoadModel',
- request_serializer=backend__pb2.ModelOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.PredictStream = channel.unary_stream(
- '/backend.Backend/PredictStream',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.Reply.FromString,
- )
- self.Embedding = channel.unary_unary(
- '/backend.Backend/Embedding',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.EmbeddingResult.FromString,
- )
- self.GenerateImage = channel.unary_unary(
- '/backend.Backend/GenerateImage',
- request_serializer=backend__pb2.GenerateImageRequest.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.AudioTranscription = channel.unary_unary(
- '/backend.Backend/AudioTranscription',
- request_serializer=backend__pb2.TranscriptRequest.SerializeToString,
- response_deserializer=backend__pb2.TranscriptResult.FromString,
- )
- self.TTS = channel.unary_unary(
- '/backend.Backend/TTS',
- request_serializer=backend__pb2.TTSRequest.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.TokenizeString = channel.unary_unary(
- '/backend.Backend/TokenizeString',
- request_serializer=backend__pb2.PredictOptions.SerializeToString,
- response_deserializer=backend__pb2.TokenizationResponse.FromString,
- )
- self.Status = channel.unary_unary(
- '/backend.Backend/Status',
- request_serializer=backend__pb2.HealthMessage.SerializeToString,
- response_deserializer=backend__pb2.StatusResponse.FromString,
- )
- self.StoresSet = channel.unary_unary(
- '/backend.Backend/StoresSet',
- request_serializer=backend__pb2.StoresSetOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.StoresDelete = channel.unary_unary(
- '/backend.Backend/StoresDelete',
- request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString,
- response_deserializer=backend__pb2.Result.FromString,
- )
- self.StoresGet = channel.unary_unary(
- '/backend.Backend/StoresGet',
- request_serializer=backend__pb2.StoresGetOptions.SerializeToString,
- response_deserializer=backend__pb2.StoresGetResult.FromString,
- )
- self.StoresFind = channel.unary_unary(
- '/backend.Backend/StoresFind',
- request_serializer=backend__pb2.StoresFindOptions.SerializeToString,
- response_deserializer=backend__pb2.StoresFindResult.FromString,
- )
-
-
-class BackendServicer(object):
- """Missing associated documentation comment in .proto file."""
-
- def Health(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Predict(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def LoadModel(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def PredictStream(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Embedding(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def GenerateImage(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def AudioTranscription(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def TTS(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def TokenizeString(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def Status(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresSet(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresDelete(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresGet(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def StoresFind(self, request, context):
- """Missing associated documentation comment in .proto file."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
-
-def add_BackendServicer_to_server(servicer, server):
- rpc_method_handlers = {
- 'Health': grpc.unary_unary_rpc_method_handler(
- servicer.Health,
- request_deserializer=backend__pb2.HealthMessage.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'Predict': grpc.unary_unary_rpc_method_handler(
- servicer.Predict,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'LoadModel': grpc.unary_unary_rpc_method_handler(
- servicer.LoadModel,
- request_deserializer=backend__pb2.ModelOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'PredictStream': grpc.unary_stream_rpc_method_handler(
- servicer.PredictStream,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.Reply.SerializeToString,
- ),
- 'Embedding': grpc.unary_unary_rpc_method_handler(
- servicer.Embedding,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.EmbeddingResult.SerializeToString,
- ),
- 'GenerateImage': grpc.unary_unary_rpc_method_handler(
- servicer.GenerateImage,
- request_deserializer=backend__pb2.GenerateImageRequest.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'AudioTranscription': grpc.unary_unary_rpc_method_handler(
- servicer.AudioTranscription,
- request_deserializer=backend__pb2.TranscriptRequest.FromString,
- response_serializer=backend__pb2.TranscriptResult.SerializeToString,
- ),
- 'TTS': grpc.unary_unary_rpc_method_handler(
- servicer.TTS,
- request_deserializer=backend__pb2.TTSRequest.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'TokenizeString': grpc.unary_unary_rpc_method_handler(
- servicer.TokenizeString,
- request_deserializer=backend__pb2.PredictOptions.FromString,
- response_serializer=backend__pb2.TokenizationResponse.SerializeToString,
- ),
- 'Status': grpc.unary_unary_rpc_method_handler(
- servicer.Status,
- request_deserializer=backend__pb2.HealthMessage.FromString,
- response_serializer=backend__pb2.StatusResponse.SerializeToString,
- ),
- 'StoresSet': grpc.unary_unary_rpc_method_handler(
- servicer.StoresSet,
- request_deserializer=backend__pb2.StoresSetOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'StoresDelete': grpc.unary_unary_rpc_method_handler(
- servicer.StoresDelete,
- request_deserializer=backend__pb2.StoresDeleteOptions.FromString,
- response_serializer=backend__pb2.Result.SerializeToString,
- ),
- 'StoresGet': grpc.unary_unary_rpc_method_handler(
- servicer.StoresGet,
- request_deserializer=backend__pb2.StoresGetOptions.FromString,
- response_serializer=backend__pb2.StoresGetResult.SerializeToString,
- ),
- 'StoresFind': grpc.unary_unary_rpc_method_handler(
- servicer.StoresFind,
- request_deserializer=backend__pb2.StoresFindOptions.FromString,
- response_serializer=backend__pb2.StoresFindResult.SerializeToString,
- ),
- }
- generic_handler = grpc.method_handlers_generic_handler(
- 'backend.Backend', rpc_method_handlers)
- server.add_generic_rpc_handlers((generic_handler,))
-
-
- # This class is part of an EXPERIMENTAL API.
-class Backend(object):
- """Missing associated documentation comment in .proto file."""
-
- @staticmethod
- def Health(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health',
- backend__pb2.HealthMessage.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Predict(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def LoadModel(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel',
- backend__pb2.ModelOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def PredictStream(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.Reply.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Embedding(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.EmbeddingResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def GenerateImage(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage',
- backend__pb2.GenerateImageRequest.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def AudioTranscription(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription',
- backend__pb2.TranscriptRequest.SerializeToString,
- backend__pb2.TranscriptResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def TTS(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS',
- backend__pb2.TTSRequest.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def TokenizeString(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString',
- backend__pb2.PredictOptions.SerializeToString,
- backend__pb2.TokenizationResponse.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def Status(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status',
- backend__pb2.HealthMessage.SerializeToString,
- backend__pb2.StatusResponse.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresSet(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet',
- backend__pb2.StoresSetOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresDelete(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete',
- backend__pb2.StoresDeleteOptions.SerializeToString,
- backend__pb2.Result.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresGet(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet',
- backend__pb2.StoresGetOptions.SerializeToString,
- backend__pb2.StoresGetResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
-
- @staticmethod
- def StoresFind(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind',
- backend__pb2.StoresFindOptions.SerializeToString,
- backend__pb2.StoresFindResult.FromString,
- options, channel_credentials,
- insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
diff --git a/docs/content/docs/getting-started/build.md b/docs/content/docs/getting-started/build.md
index 8ceaf1f5..a4db135e 100644
--- a/docs/content/docs/getting-started/build.md
+++ b/docs/content/docs/getting-started/build.md
@@ -36,14 +36,28 @@ To install the dependencies follow the instructions below:
Install `xcode` from the App Store
```bash
-brew install abseil cmake go grpc protobuf wget
+brew install abseil cmake go grpc protobuf protoc-gen-go protoc-gen-go-grpc python wget
+```
+
+After installing the above dependencies, you need to install grpcio-tools from PyPI. You could do this via a pip --user install or a virtualenv.
+
+```bash
+pip install --user grpcio-tools
```
{{% /tab %}}
{{% tab tabName="Debian" %}}
```bash
-apt install golang protobuf-compiler-grpc libgrpc-dev make cmake
+apt install cmake golang libgrpc-dev make protobuf-compiler-grpc python3-grpc-tools
+```
+
+After you have golang installed and working, you can install the required binaries for compiling the golang protobuf components via the following commands
+
+```bash
+go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
+go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
+
```
{{% /tab %}}
diff --git a/pkg/grpc/proto/backend.pb.go b/pkg/grpc/proto/backend.pb.go
deleted file mode 100644
index e9afe196..00000000
--- a/pkg/grpc/proto/backend.pb.go
+++ /dev/null
@@ -1,2934 +0,0 @@
-// Code generated by protoc-gen-go. DO NOT EDIT.
-// versions:
-// protoc-gen-go v1.26.0
-// protoc v5.26.1
-// source: backend.proto
-
-package proto
-
-import (
- protoreflect "google.golang.org/protobuf/reflect/protoreflect"
- protoimpl "google.golang.org/protobuf/runtime/protoimpl"
- reflect "reflect"
- sync "sync"
-)
-
-const (
- // Verify that this generated code is sufficiently up-to-date.
- _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
- // Verify that runtime/protoimpl is sufficiently up-to-date.
- _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
-)
-
-type StatusResponse_State int32
-
-const (
- StatusResponse_UNINITIALIZED StatusResponse_State = 0
- StatusResponse_BUSY StatusResponse_State = 1
- StatusResponse_READY StatusResponse_State = 2
- StatusResponse_ERROR StatusResponse_State = -1
-)
-
-// Enum value maps for StatusResponse_State.
-var (
- StatusResponse_State_name = map[int32]string{
- 0: "UNINITIALIZED",
- 1: "BUSY",
- 2: "READY",
- -1: "ERROR",
- }
- StatusResponse_State_value = map[string]int32{
- "UNINITIALIZED": 0,
- "BUSY": 1,
- "READY": 2,
- "ERROR": -1,
- }
-)
-
-func (x StatusResponse_State) Enum() *StatusResponse_State {
- p := new(StatusResponse_State)
- *p = x
- return p
-}
-
-func (x StatusResponse_State) String() string {
- return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x))
-}
-
-func (StatusResponse_State) Descriptor() protoreflect.EnumDescriptor {
- return file_backend_proto_enumTypes[0].Descriptor()
-}
-
-func (StatusResponse_State) Type() protoreflect.EnumType {
- return &file_backend_proto_enumTypes[0]
-}
-
-func (x StatusResponse_State) Number() protoreflect.EnumNumber {
- return protoreflect.EnumNumber(x)
-}
-
-// Deprecated: Use StatusResponse_State.Descriptor instead.
-func (StatusResponse_State) EnumDescriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{21, 0}
-}
-
-type StoresKey struct {
- state protoimpl.MessageState
- sizeCache protoimpl.SizeCache
- unknownFields protoimpl.UnknownFields
-
- Floats []float32 `protobuf:"fixed32,1,rep,packed,name=Floats,proto3" json:"Floats,omitempty"`
-}
-
-func (x *StoresKey) Reset() {
- *x = StoresKey{}
- if protoimpl.UnsafeEnabled {
- mi := &file_backend_proto_msgTypes[0]
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- ms.StoreMessageInfo(mi)
- }
-}
-
-func (x *StoresKey) String() string {
- return protoimpl.X.MessageStringOf(x)
-}
-
-func (*StoresKey) ProtoMessage() {}
-
-func (x *StoresKey) ProtoReflect() protoreflect.Message {
- mi := &file_backend_proto_msgTypes[0]
- if protoimpl.UnsafeEnabled && x != nil {
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- if ms.LoadMessageInfo() == nil {
- ms.StoreMessageInfo(mi)
- }
- return ms
- }
- return mi.MessageOf(x)
-}
-
-// Deprecated: Use StoresKey.ProtoReflect.Descriptor instead.
-func (*StoresKey) Descriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{0}
-}
-
-func (x *StoresKey) GetFloats() []float32 {
- if x != nil {
- return x.Floats
- }
- return nil
-}
-
-type StoresValue struct {
- state protoimpl.MessageState
- sizeCache protoimpl.SizeCache
- unknownFields protoimpl.UnknownFields
-
- Bytes []byte `protobuf:"bytes,1,opt,name=Bytes,proto3" json:"Bytes,omitempty"`
-}
-
-func (x *StoresValue) Reset() {
- *x = StoresValue{}
- if protoimpl.UnsafeEnabled {
- mi := &file_backend_proto_msgTypes[1]
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- ms.StoreMessageInfo(mi)
- }
-}
-
-func (x *StoresValue) String() string {
- return protoimpl.X.MessageStringOf(x)
-}
-
-func (*StoresValue) ProtoMessage() {}
-
-func (x *StoresValue) ProtoReflect() protoreflect.Message {
- mi := &file_backend_proto_msgTypes[1]
- if protoimpl.UnsafeEnabled && x != nil {
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- if ms.LoadMessageInfo() == nil {
- ms.StoreMessageInfo(mi)
- }
- return ms
- }
- return mi.MessageOf(x)
-}
-
-// Deprecated: Use StoresValue.ProtoReflect.Descriptor instead.
-func (*StoresValue) Descriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{1}
-}
-
-func (x *StoresValue) GetBytes() []byte {
- if x != nil {
- return x.Bytes
- }
- return nil
-}
-
-type StoresSetOptions struct {
- state protoimpl.MessageState
- sizeCache protoimpl.SizeCache
- unknownFields protoimpl.UnknownFields
-
- Keys []*StoresKey `protobuf:"bytes,1,rep,name=Keys,proto3" json:"Keys,omitempty"`
- Values []*StoresValue `protobuf:"bytes,2,rep,name=Values,proto3" json:"Values,omitempty"`
-}
-
-func (x *StoresSetOptions) Reset() {
- *x = StoresSetOptions{}
- if protoimpl.UnsafeEnabled {
- mi := &file_backend_proto_msgTypes[2]
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- ms.StoreMessageInfo(mi)
- }
-}
-
-func (x *StoresSetOptions) String() string {
- return protoimpl.X.MessageStringOf(x)
-}
-
-func (*StoresSetOptions) ProtoMessage() {}
-
-func (x *StoresSetOptions) ProtoReflect() protoreflect.Message {
- mi := &file_backend_proto_msgTypes[2]
- if protoimpl.UnsafeEnabled && x != nil {
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- if ms.LoadMessageInfo() == nil {
- ms.StoreMessageInfo(mi)
- }
- return ms
- }
- return mi.MessageOf(x)
-}
-
-// Deprecated: Use StoresSetOptions.ProtoReflect.Descriptor instead.
-func (*StoresSetOptions) Descriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{2}
-}
-
-func (x *StoresSetOptions) GetKeys() []*StoresKey {
- if x != nil {
- return x.Keys
- }
- return nil
-}
-
-func (x *StoresSetOptions) GetValues() []*StoresValue {
- if x != nil {
- return x.Values
- }
- return nil
-}
-
-type StoresDeleteOptions struct {
- state protoimpl.MessageState
- sizeCache protoimpl.SizeCache
- unknownFields protoimpl.UnknownFields
-
- Keys []*StoresKey `protobuf:"bytes,1,rep,name=Keys,proto3" json:"Keys,omitempty"`
-}
-
-func (x *StoresDeleteOptions) Reset() {
- *x = StoresDeleteOptions{}
- if protoimpl.UnsafeEnabled {
- mi := &file_backend_proto_msgTypes[3]
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- ms.StoreMessageInfo(mi)
- }
-}
-
-func (x *StoresDeleteOptions) String() string {
- return protoimpl.X.MessageStringOf(x)
-}
-
-func (*StoresDeleteOptions) ProtoMessage() {}
-
-func (x *StoresDeleteOptions) ProtoReflect() protoreflect.Message {
- mi := &file_backend_proto_msgTypes[3]
- if protoimpl.UnsafeEnabled && x != nil {
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- if ms.LoadMessageInfo() == nil {
- ms.StoreMessageInfo(mi)
- }
- return ms
- }
- return mi.MessageOf(x)
-}
-
-// Deprecated: Use StoresDeleteOptions.ProtoReflect.Descriptor instead.
-func (*StoresDeleteOptions) Descriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{3}
-}
-
-func (x *StoresDeleteOptions) GetKeys() []*StoresKey {
- if x != nil {
- return x.Keys
- }
- return nil
-}
-
-type StoresGetOptions struct {
- state protoimpl.MessageState
- sizeCache protoimpl.SizeCache
- unknownFields protoimpl.UnknownFields
-
- Keys []*StoresKey `protobuf:"bytes,1,rep,name=Keys,proto3" json:"Keys,omitempty"`
-}
-
-func (x *StoresGetOptions) Reset() {
- *x = StoresGetOptions{}
- if protoimpl.UnsafeEnabled {
- mi := &file_backend_proto_msgTypes[4]
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- ms.StoreMessageInfo(mi)
- }
-}
-
-func (x *StoresGetOptions) String() string {
- return protoimpl.X.MessageStringOf(x)
-}
-
-func (*StoresGetOptions) ProtoMessage() {}
-
-func (x *StoresGetOptions) ProtoReflect() protoreflect.Message {
- mi := &file_backend_proto_msgTypes[4]
- if protoimpl.UnsafeEnabled && x != nil {
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- if ms.LoadMessageInfo() == nil {
- ms.StoreMessageInfo(mi)
- }
- return ms
- }
- return mi.MessageOf(x)
-}
-
-// Deprecated: Use StoresGetOptions.ProtoReflect.Descriptor instead.
-func (*StoresGetOptions) Descriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{4}
-}
-
-func (x *StoresGetOptions) GetKeys() []*StoresKey {
- if x != nil {
- return x.Keys
- }
- return nil
-}
-
-type StoresGetResult struct {
- state protoimpl.MessageState
- sizeCache protoimpl.SizeCache
- unknownFields protoimpl.UnknownFields
-
- Keys []*StoresKey `protobuf:"bytes,1,rep,name=Keys,proto3" json:"Keys,omitempty"`
- Values []*StoresValue `protobuf:"bytes,2,rep,name=Values,proto3" json:"Values,omitempty"`
-}
-
-func (x *StoresGetResult) Reset() {
- *x = StoresGetResult{}
- if protoimpl.UnsafeEnabled {
- mi := &file_backend_proto_msgTypes[5]
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- ms.StoreMessageInfo(mi)
- }
-}
-
-func (x *StoresGetResult) String() string {
- return protoimpl.X.MessageStringOf(x)
-}
-
-func (*StoresGetResult) ProtoMessage() {}
-
-func (x *StoresGetResult) ProtoReflect() protoreflect.Message {
- mi := &file_backend_proto_msgTypes[5]
- if protoimpl.UnsafeEnabled && x != nil {
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- if ms.LoadMessageInfo() == nil {
- ms.StoreMessageInfo(mi)
- }
- return ms
- }
- return mi.MessageOf(x)
-}
-
-// Deprecated: Use StoresGetResult.ProtoReflect.Descriptor instead.
-func (*StoresGetResult) Descriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{5}
-}
-
-func (x *StoresGetResult) GetKeys() []*StoresKey {
- if x != nil {
- return x.Keys
- }
- return nil
-}
-
-func (x *StoresGetResult) GetValues() []*StoresValue {
- if x != nil {
- return x.Values
- }
- return nil
-}
-
-type StoresFindOptions struct {
- state protoimpl.MessageState
- sizeCache protoimpl.SizeCache
- unknownFields protoimpl.UnknownFields
-
- Key *StoresKey `protobuf:"bytes,1,opt,name=Key,proto3" json:"Key,omitempty"`
- TopK int32 `protobuf:"varint,2,opt,name=TopK,proto3" json:"TopK,omitempty"`
-}
-
-func (x *StoresFindOptions) Reset() {
- *x = StoresFindOptions{}
- if protoimpl.UnsafeEnabled {
- mi := &file_backend_proto_msgTypes[6]
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- ms.StoreMessageInfo(mi)
- }
-}
-
-func (x *StoresFindOptions) String() string {
- return protoimpl.X.MessageStringOf(x)
-}
-
-func (*StoresFindOptions) ProtoMessage() {}
-
-func (x *StoresFindOptions) ProtoReflect() protoreflect.Message {
- mi := &file_backend_proto_msgTypes[6]
- if protoimpl.UnsafeEnabled && x != nil {
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- if ms.LoadMessageInfo() == nil {
- ms.StoreMessageInfo(mi)
- }
- return ms
- }
- return mi.MessageOf(x)
-}
-
-// Deprecated: Use StoresFindOptions.ProtoReflect.Descriptor instead.
-func (*StoresFindOptions) Descriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{6}
-}
-
-func (x *StoresFindOptions) GetKey() *StoresKey {
- if x != nil {
- return x.Key
- }
- return nil
-}
-
-func (x *StoresFindOptions) GetTopK() int32 {
- if x != nil {
- return x.TopK
- }
- return 0
-}
-
-type StoresFindResult struct {
- state protoimpl.MessageState
- sizeCache protoimpl.SizeCache
- unknownFields protoimpl.UnknownFields
-
- Keys []*StoresKey `protobuf:"bytes,1,rep,name=Keys,proto3" json:"Keys,omitempty"`
- Values []*StoresValue `protobuf:"bytes,2,rep,name=Values,proto3" json:"Values,omitempty"`
- Similarities []float32 `protobuf:"fixed32,3,rep,packed,name=Similarities,proto3" json:"Similarities,omitempty"`
-}
-
-func (x *StoresFindResult) Reset() {
- *x = StoresFindResult{}
- if protoimpl.UnsafeEnabled {
- mi := &file_backend_proto_msgTypes[7]
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- ms.StoreMessageInfo(mi)
- }
-}
-
-func (x *StoresFindResult) String() string {
- return protoimpl.X.MessageStringOf(x)
-}
-
-func (*StoresFindResult) ProtoMessage() {}
-
-func (x *StoresFindResult) ProtoReflect() protoreflect.Message {
- mi := &file_backend_proto_msgTypes[7]
- if protoimpl.UnsafeEnabled && x != nil {
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- if ms.LoadMessageInfo() == nil {
- ms.StoreMessageInfo(mi)
- }
- return ms
- }
- return mi.MessageOf(x)
-}
-
-// Deprecated: Use StoresFindResult.ProtoReflect.Descriptor instead.
-func (*StoresFindResult) Descriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{7}
-}
-
-func (x *StoresFindResult) GetKeys() []*StoresKey {
- if x != nil {
- return x.Keys
- }
- return nil
-}
-
-func (x *StoresFindResult) GetValues() []*StoresValue {
- if x != nil {
- return x.Values
- }
- return nil
-}
-
-func (x *StoresFindResult) GetSimilarities() []float32 {
- if x != nil {
- return x.Similarities
- }
- return nil
-}
-
-type HealthMessage struct {
- state protoimpl.MessageState
- sizeCache protoimpl.SizeCache
- unknownFields protoimpl.UnknownFields
-}
-
-func (x *HealthMessage) Reset() {
- *x = HealthMessage{}
- if protoimpl.UnsafeEnabled {
- mi := &file_backend_proto_msgTypes[8]
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- ms.StoreMessageInfo(mi)
- }
-}
-
-func (x *HealthMessage) String() string {
- return protoimpl.X.MessageStringOf(x)
-}
-
-func (*HealthMessage) ProtoMessage() {}
-
-func (x *HealthMessage) ProtoReflect() protoreflect.Message {
- mi := &file_backend_proto_msgTypes[8]
- if protoimpl.UnsafeEnabled && x != nil {
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- if ms.LoadMessageInfo() == nil {
- ms.StoreMessageInfo(mi)
- }
- return ms
- }
- return mi.MessageOf(x)
-}
-
-// Deprecated: Use HealthMessage.ProtoReflect.Descriptor instead.
-func (*HealthMessage) Descriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{8}
-}
-
-// The request message containing the user's name.
-type PredictOptions struct {
- state protoimpl.MessageState
- sizeCache protoimpl.SizeCache
- unknownFields protoimpl.UnknownFields
-
- Prompt string `protobuf:"bytes,1,opt,name=Prompt,proto3" json:"Prompt,omitempty"`
- Seed int32 `protobuf:"varint,2,opt,name=Seed,proto3" json:"Seed,omitempty"`
- Threads int32 `protobuf:"varint,3,opt,name=Threads,proto3" json:"Threads,omitempty"`
- Tokens int32 `protobuf:"varint,4,opt,name=Tokens,proto3" json:"Tokens,omitempty"`
- TopK int32 `protobuf:"varint,5,opt,name=TopK,proto3" json:"TopK,omitempty"`
- Repeat int32 `protobuf:"varint,6,opt,name=Repeat,proto3" json:"Repeat,omitempty"`
- Batch int32 `protobuf:"varint,7,opt,name=Batch,proto3" json:"Batch,omitempty"`
- NKeep int32 `protobuf:"varint,8,opt,name=NKeep,proto3" json:"NKeep,omitempty"`
- Temperature float32 `protobuf:"fixed32,9,opt,name=Temperature,proto3" json:"Temperature,omitempty"`
- Penalty float32 `protobuf:"fixed32,10,opt,name=Penalty,proto3" json:"Penalty,omitempty"`
- F16KV bool `protobuf:"varint,11,opt,name=F16KV,proto3" json:"F16KV,omitempty"`
- DebugMode bool `protobuf:"varint,12,opt,name=DebugMode,proto3" json:"DebugMode,omitempty"`
- StopPrompts []string `protobuf:"bytes,13,rep,name=StopPrompts,proto3" json:"StopPrompts,omitempty"`
- IgnoreEOS bool `protobuf:"varint,14,opt,name=IgnoreEOS,proto3" json:"IgnoreEOS,omitempty"`
- TailFreeSamplingZ float32 `protobuf:"fixed32,15,opt,name=TailFreeSamplingZ,proto3" json:"TailFreeSamplingZ,omitempty"`
- TypicalP float32 `protobuf:"fixed32,16,opt,name=TypicalP,proto3" json:"TypicalP,omitempty"`
- FrequencyPenalty float32 `protobuf:"fixed32,17,opt,name=FrequencyPenalty,proto3" json:"FrequencyPenalty,omitempty"`
- PresencePenalty float32 `protobuf:"fixed32,18,opt,name=PresencePenalty,proto3" json:"PresencePenalty,omitempty"`
- Mirostat int32 `protobuf:"varint,19,opt,name=Mirostat,proto3" json:"Mirostat,omitempty"`
- MirostatETA float32 `protobuf:"fixed32,20,opt,name=MirostatETA,proto3" json:"MirostatETA,omitempty"`
- MirostatTAU float32 `protobuf:"fixed32,21,opt,name=MirostatTAU,proto3" json:"MirostatTAU,omitempty"`
- PenalizeNL bool `protobuf:"varint,22,opt,name=PenalizeNL,proto3" json:"PenalizeNL,omitempty"`
- LogitBias string `protobuf:"bytes,23,opt,name=LogitBias,proto3" json:"LogitBias,omitempty"`
- MLock bool `protobuf:"varint,25,opt,name=MLock,proto3" json:"MLock,omitempty"`
- MMap bool `protobuf:"varint,26,opt,name=MMap,proto3" json:"MMap,omitempty"`
- PromptCacheAll bool `protobuf:"varint,27,opt,name=PromptCacheAll,proto3" json:"PromptCacheAll,omitempty"`
- PromptCacheRO bool `protobuf:"varint,28,opt,name=PromptCacheRO,proto3" json:"PromptCacheRO,omitempty"`
- Grammar string `protobuf:"bytes,29,opt,name=Grammar,proto3" json:"Grammar,omitempty"`
- MainGPU string `protobuf:"bytes,30,opt,name=MainGPU,proto3" json:"MainGPU,omitempty"`
- TensorSplit string `protobuf:"bytes,31,opt,name=TensorSplit,proto3" json:"TensorSplit,omitempty"`
- TopP float32 `protobuf:"fixed32,32,opt,name=TopP,proto3" json:"TopP,omitempty"`
- PromptCachePath string `protobuf:"bytes,33,opt,name=PromptCachePath,proto3" json:"PromptCachePath,omitempty"`
- Debug bool `protobuf:"varint,34,opt,name=Debug,proto3" json:"Debug,omitempty"`
- EmbeddingTokens []int32 `protobuf:"varint,35,rep,packed,name=EmbeddingTokens,proto3" json:"EmbeddingTokens,omitempty"`
- Embeddings string `protobuf:"bytes,36,opt,name=Embeddings,proto3" json:"Embeddings,omitempty"`
- RopeFreqBase float32 `protobuf:"fixed32,37,opt,name=RopeFreqBase,proto3" json:"RopeFreqBase,omitempty"`
- RopeFreqScale float32 `protobuf:"fixed32,38,opt,name=RopeFreqScale,proto3" json:"RopeFreqScale,omitempty"`
- NegativePromptScale float32 `protobuf:"fixed32,39,opt,name=NegativePromptScale,proto3" json:"NegativePromptScale,omitempty"`
- NegativePrompt string `protobuf:"bytes,40,opt,name=NegativePrompt,proto3" json:"NegativePrompt,omitempty"`
- NDraft int32 `protobuf:"varint,41,opt,name=NDraft,proto3" json:"NDraft,omitempty"`
- Images []string `protobuf:"bytes,42,rep,name=Images,proto3" json:"Images,omitempty"`
- UseTokenizerTemplate bool `protobuf:"varint,43,opt,name=UseTokenizerTemplate,proto3" json:"UseTokenizerTemplate,omitempty"`
- Messages []*Message `protobuf:"bytes,44,rep,name=Messages,proto3" json:"Messages,omitempty"`
-}
-
-func (x *PredictOptions) Reset() {
- *x = PredictOptions{}
- if protoimpl.UnsafeEnabled {
- mi := &file_backend_proto_msgTypes[9]
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- ms.StoreMessageInfo(mi)
- }
-}
-
-func (x *PredictOptions) String() string {
- return protoimpl.X.MessageStringOf(x)
-}
-
-func (*PredictOptions) ProtoMessage() {}
-
-func (x *PredictOptions) ProtoReflect() protoreflect.Message {
- mi := &file_backend_proto_msgTypes[9]
- if protoimpl.UnsafeEnabled && x != nil {
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- if ms.LoadMessageInfo() == nil {
- ms.StoreMessageInfo(mi)
- }
- return ms
- }
- return mi.MessageOf(x)
-}
-
-// Deprecated: Use PredictOptions.ProtoReflect.Descriptor instead.
-func (*PredictOptions) Descriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{9}
-}
-
-func (x *PredictOptions) GetPrompt() string {
- if x != nil {
- return x.Prompt
- }
- return ""
-}
-
-func (x *PredictOptions) GetSeed() int32 {
- if x != nil {
- return x.Seed
- }
- return 0
-}
-
-func (x *PredictOptions) GetThreads() int32 {
- if x != nil {
- return x.Threads
- }
- return 0
-}
-
-func (x *PredictOptions) GetTokens() int32 {
- if x != nil {
- return x.Tokens
- }
- return 0
-}
-
-func (x *PredictOptions) GetTopK() int32 {
- if x != nil {
- return x.TopK
- }
- return 0
-}
-
-func (x *PredictOptions) GetRepeat() int32 {
- if x != nil {
- return x.Repeat
- }
- return 0
-}
-
-func (x *PredictOptions) GetBatch() int32 {
- if x != nil {
- return x.Batch
- }
- return 0
-}
-
-func (x *PredictOptions) GetNKeep() int32 {
- if x != nil {
- return x.NKeep
- }
- return 0
-}
-
-func (x *PredictOptions) GetTemperature() float32 {
- if x != nil {
- return x.Temperature
- }
- return 0
-}
-
-func (x *PredictOptions) GetPenalty() float32 {
- if x != nil {
- return x.Penalty
- }
- return 0
-}
-
-func (x *PredictOptions) GetF16KV() bool {
- if x != nil {
- return x.F16KV
- }
- return false
-}
-
-func (x *PredictOptions) GetDebugMode() bool {
- if x != nil {
- return x.DebugMode
- }
- return false
-}
-
-func (x *PredictOptions) GetStopPrompts() []string {
- if x != nil {
- return x.StopPrompts
- }
- return nil
-}
-
-func (x *PredictOptions) GetIgnoreEOS() bool {
- if x != nil {
- return x.IgnoreEOS
- }
- return false
-}
-
-func (x *PredictOptions) GetTailFreeSamplingZ() float32 {
- if x != nil {
- return x.TailFreeSamplingZ
- }
- return 0
-}
-
-func (x *PredictOptions) GetTypicalP() float32 {
- if x != nil {
- return x.TypicalP
- }
- return 0
-}
-
-func (x *PredictOptions) GetFrequencyPenalty() float32 {
- if x != nil {
- return x.FrequencyPenalty
- }
- return 0
-}
-
-func (x *PredictOptions) GetPresencePenalty() float32 {
- if x != nil {
- return x.PresencePenalty
- }
- return 0
-}
-
-func (x *PredictOptions) GetMirostat() int32 {
- if x != nil {
- return x.Mirostat
- }
- return 0
-}
-
-func (x *PredictOptions) GetMirostatETA() float32 {
- if x != nil {
- return x.MirostatETA
- }
- return 0
-}
-
-func (x *PredictOptions) GetMirostatTAU() float32 {
- if x != nil {
- return x.MirostatTAU
- }
- return 0
-}
-
-func (x *PredictOptions) GetPenalizeNL() bool {
- if x != nil {
- return x.PenalizeNL
- }
- return false
-}
-
-func (x *PredictOptions) GetLogitBias() string {
- if x != nil {
- return x.LogitBias
- }
- return ""
-}
-
-func (x *PredictOptions) GetMLock() bool {
- if x != nil {
- return x.MLock
- }
- return false
-}
-
-func (x *PredictOptions) GetMMap() bool {
- if x != nil {
- return x.MMap
- }
- return false
-}
-
-func (x *PredictOptions) GetPromptCacheAll() bool {
- if x != nil {
- return x.PromptCacheAll
- }
- return false
-}
-
-func (x *PredictOptions) GetPromptCacheRO() bool {
- if x != nil {
- return x.PromptCacheRO
- }
- return false
-}
-
-func (x *PredictOptions) GetGrammar() string {
- if x != nil {
- return x.Grammar
- }
- return ""
-}
-
-func (x *PredictOptions) GetMainGPU() string {
- if x != nil {
- return x.MainGPU
- }
- return ""
-}
-
-func (x *PredictOptions) GetTensorSplit() string {
- if x != nil {
- return x.TensorSplit
- }
- return ""
-}
-
-func (x *PredictOptions) GetTopP() float32 {
- if x != nil {
- return x.TopP
- }
- return 0
-}
-
-func (x *PredictOptions) GetPromptCachePath() string {
- if x != nil {
- return x.PromptCachePath
- }
- return ""
-}
-
-func (x *PredictOptions) GetDebug() bool {
- if x != nil {
- return x.Debug
- }
- return false
-}
-
-func (x *PredictOptions) GetEmbeddingTokens() []int32 {
- if x != nil {
- return x.EmbeddingTokens
- }
- return nil
-}
-
-func (x *PredictOptions) GetEmbeddings() string {
- if x != nil {
- return x.Embeddings
- }
- return ""
-}
-
-func (x *PredictOptions) GetRopeFreqBase() float32 {
- if x != nil {
- return x.RopeFreqBase
- }
- return 0
-}
-
-func (x *PredictOptions) GetRopeFreqScale() float32 {
- if x != nil {
- return x.RopeFreqScale
- }
- return 0
-}
-
-func (x *PredictOptions) GetNegativePromptScale() float32 {
- if x != nil {
- return x.NegativePromptScale
- }
- return 0
-}
-
-func (x *PredictOptions) GetNegativePrompt() string {
- if x != nil {
- return x.NegativePrompt
- }
- return ""
-}
-
-func (x *PredictOptions) GetNDraft() int32 {
- if x != nil {
- return x.NDraft
- }
- return 0
-}
-
-func (x *PredictOptions) GetImages() []string {
- if x != nil {
- return x.Images
- }
- return nil
-}
-
-func (x *PredictOptions) GetUseTokenizerTemplate() bool {
- if x != nil {
- return x.UseTokenizerTemplate
- }
- return false
-}
-
-func (x *PredictOptions) GetMessages() []*Message {
- if x != nil {
- return x.Messages
- }
- return nil
-}
-
-// The response message containing the result
-type Reply struct {
- state protoimpl.MessageState
- sizeCache protoimpl.SizeCache
- unknownFields protoimpl.UnknownFields
-
- Message []byte `protobuf:"bytes,1,opt,name=message,proto3" json:"message,omitempty"`
-}
-
-func (x *Reply) Reset() {
- *x = Reply{}
- if protoimpl.UnsafeEnabled {
- mi := &file_backend_proto_msgTypes[10]
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- ms.StoreMessageInfo(mi)
- }
-}
-
-func (x *Reply) String() string {
- return protoimpl.X.MessageStringOf(x)
-}
-
-func (*Reply) ProtoMessage() {}
-
-func (x *Reply) ProtoReflect() protoreflect.Message {
- mi := &file_backend_proto_msgTypes[10]
- if protoimpl.UnsafeEnabled && x != nil {
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- if ms.LoadMessageInfo() == nil {
- ms.StoreMessageInfo(mi)
- }
- return ms
- }
- return mi.MessageOf(x)
-}
-
-// Deprecated: Use Reply.ProtoReflect.Descriptor instead.
-func (*Reply) Descriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{10}
-}
-
-func (x *Reply) GetMessage() []byte {
- if x != nil {
- return x.Message
- }
- return nil
-}
-
-type ModelOptions struct {
- state protoimpl.MessageState
- sizeCache protoimpl.SizeCache
- unknownFields protoimpl.UnknownFields
-
- Model string `protobuf:"bytes,1,opt,name=Model,proto3" json:"Model,omitempty"`
- ContextSize int32 `protobuf:"varint,2,opt,name=ContextSize,proto3" json:"ContextSize,omitempty"`
- Seed int32 `protobuf:"varint,3,opt,name=Seed,proto3" json:"Seed,omitempty"`
- NBatch int32 `protobuf:"varint,4,opt,name=NBatch,proto3" json:"NBatch,omitempty"`
- F16Memory bool `protobuf:"varint,5,opt,name=F16Memory,proto3" json:"F16Memory,omitempty"`
- MLock bool `protobuf:"varint,6,opt,name=MLock,proto3" json:"MLock,omitempty"`
- MMap bool `protobuf:"varint,7,opt,name=MMap,proto3" json:"MMap,omitempty"`
- VocabOnly bool `protobuf:"varint,8,opt,name=VocabOnly,proto3" json:"VocabOnly,omitempty"`
- LowVRAM bool `protobuf:"varint,9,opt,name=LowVRAM,proto3" json:"LowVRAM,omitempty"`
- Embeddings bool `protobuf:"varint,10,opt,name=Embeddings,proto3" json:"Embeddings,omitempty"`
- NUMA bool `protobuf:"varint,11,opt,name=NUMA,proto3" json:"NUMA,omitempty"`
- NGPULayers int32 `protobuf:"varint,12,opt,name=NGPULayers,proto3" json:"NGPULayers,omitempty"`
- MainGPU string `protobuf:"bytes,13,opt,name=MainGPU,proto3" json:"MainGPU,omitempty"`
- TensorSplit string `protobuf:"bytes,14,opt,name=TensorSplit,proto3" json:"TensorSplit,omitempty"`
- Threads int32 `protobuf:"varint,15,opt,name=Threads,proto3" json:"Threads,omitempty"`
- LibrarySearchPath string `protobuf:"bytes,16,opt,name=LibrarySearchPath,proto3" json:"LibrarySearchPath,omitempty"`
- RopeFreqBase float32 `protobuf:"fixed32,17,opt,name=RopeFreqBase,proto3" json:"RopeFreqBase,omitempty"`
- RopeFreqScale float32 `protobuf:"fixed32,18,opt,name=RopeFreqScale,proto3" json:"RopeFreqScale,omitempty"`
- RMSNormEps float32 `protobuf:"fixed32,19,opt,name=RMSNormEps,proto3" json:"RMSNormEps,omitempty"`
- NGQA int32 `protobuf:"varint,20,opt,name=NGQA,proto3" json:"NGQA,omitempty"`
- ModelFile string `protobuf:"bytes,21,opt,name=ModelFile,proto3" json:"ModelFile,omitempty"`
- // AutoGPTQ
- Device string `protobuf:"bytes,22,opt,name=Device,proto3" json:"Device,omitempty"`
- UseTriton bool `protobuf:"varint,23,opt,name=UseTriton,proto3" json:"UseTriton,omitempty"`
- ModelBaseName string `protobuf:"bytes,24,opt,name=ModelBaseName,proto3" json:"ModelBaseName,omitempty"`
- UseFastTokenizer bool `protobuf:"varint,25,opt,name=UseFastTokenizer,proto3" json:"UseFastTokenizer,omitempty"`
- // Diffusers
- PipelineType string `protobuf:"bytes,26,opt,name=PipelineType,proto3" json:"PipelineType,omitempty"`
- SchedulerType string `protobuf:"bytes,27,opt,name=SchedulerType,proto3" json:"SchedulerType,omitempty"`
- CUDA bool `protobuf:"varint,28,opt,name=CUDA,proto3" json:"CUDA,omitempty"`
- CFGScale float32 `protobuf:"fixed32,29,opt,name=CFGScale,proto3" json:"CFGScale,omitempty"`
- IMG2IMG bool `protobuf:"varint,30,opt,name=IMG2IMG,proto3" json:"IMG2IMG,omitempty"`
- CLIPModel string `protobuf:"bytes,31,opt,name=CLIPModel,proto3" json:"CLIPModel,omitempty"`
- CLIPSubfolder string `protobuf:"bytes,32,opt,name=CLIPSubfolder,proto3" json:"CLIPSubfolder,omitempty"`
- CLIPSkip int32 `protobuf:"varint,33,opt,name=CLIPSkip,proto3" json:"CLIPSkip,omitempty"`
- ControlNet string `protobuf:"bytes,48,opt,name=ControlNet,proto3" json:"ControlNet,omitempty"`
- Tokenizer string `protobuf:"bytes,34,opt,name=Tokenizer,proto3" json:"Tokenizer,omitempty"`
- // LLM (llama.cpp)
- LoraBase string `protobuf:"bytes,35,opt,name=LoraBase,proto3" json:"LoraBase,omitempty"`
- LoraAdapter string `protobuf:"bytes,36,opt,name=LoraAdapter,proto3" json:"LoraAdapter,omitempty"`
- LoraScale float32 `protobuf:"fixed32,42,opt,name=LoraScale,proto3" json:"LoraScale,omitempty"`
- NoMulMatQ bool `protobuf:"varint,37,opt,name=NoMulMatQ,proto3" json:"NoMulMatQ,omitempty"`
- DraftModel string `protobuf:"bytes,39,opt,name=DraftModel,proto3" json:"DraftModel,omitempty"`
- AudioPath string `protobuf:"bytes,38,opt,name=AudioPath,proto3" json:"AudioPath,omitempty"`
- // vllm
- Quantization string `protobuf:"bytes,40,opt,name=Quantization,proto3" json:"Quantization,omitempty"`
- GPUMemoryUtilization float32 `protobuf:"fixed32,50,opt,name=GPUMemoryUtilization,proto3" json:"GPUMemoryUtilization,omitempty"`
- TrustRemoteCode bool `protobuf:"varint,51,opt,name=TrustRemoteCode,proto3" json:"TrustRemoteCode,omitempty"`
- EnforceEager bool `protobuf:"varint,52,opt,name=EnforceEager,proto3" json:"EnforceEager,omitempty"`
- SwapSpace int32 `protobuf:"varint,53,opt,name=SwapSpace,proto3" json:"SwapSpace,omitempty"`
- MaxModelLen int32 `protobuf:"varint,54,opt,name=MaxModelLen,proto3" json:"MaxModelLen,omitempty"`
- MMProj string `protobuf:"bytes,41,opt,name=MMProj,proto3" json:"MMProj,omitempty"`
- RopeScaling string `protobuf:"bytes,43,opt,name=RopeScaling,proto3" json:"RopeScaling,omitempty"`
- YarnExtFactor float32 `protobuf:"fixed32,44,opt,name=YarnExtFactor,proto3" json:"YarnExtFactor,omitempty"`
- YarnAttnFactor float32 `protobuf:"fixed32,45,opt,name=YarnAttnFactor,proto3" json:"YarnAttnFactor,omitempty"`
- YarnBetaFast float32 `protobuf:"fixed32,46,opt,name=YarnBetaFast,proto3" json:"YarnBetaFast,omitempty"`
- YarnBetaSlow float32 `protobuf:"fixed32,47,opt,name=YarnBetaSlow,proto3" json:"YarnBetaSlow,omitempty"`
- Type string `protobuf:"bytes,49,opt,name=Type,proto3" json:"Type,omitempty"`
-}
-
-func (x *ModelOptions) Reset() {
- *x = ModelOptions{}
- if protoimpl.UnsafeEnabled {
- mi := &file_backend_proto_msgTypes[11]
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- ms.StoreMessageInfo(mi)
- }
-}
-
-func (x *ModelOptions) String() string {
- return protoimpl.X.MessageStringOf(x)
-}
-
-func (*ModelOptions) ProtoMessage() {}
-
-func (x *ModelOptions) ProtoReflect() protoreflect.Message {
- mi := &file_backend_proto_msgTypes[11]
- if protoimpl.UnsafeEnabled && x != nil {
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- if ms.LoadMessageInfo() == nil {
- ms.StoreMessageInfo(mi)
- }
- return ms
- }
- return mi.MessageOf(x)
-}
-
-// Deprecated: Use ModelOptions.ProtoReflect.Descriptor instead.
-func (*ModelOptions) Descriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{11}
-}
-
-func (x *ModelOptions) GetModel() string {
- if x != nil {
- return x.Model
- }
- return ""
-}
-
-func (x *ModelOptions) GetContextSize() int32 {
- if x != nil {
- return x.ContextSize
- }
- return 0
-}
-
-func (x *ModelOptions) GetSeed() int32 {
- if x != nil {
- return x.Seed
- }
- return 0
-}
-
-func (x *ModelOptions) GetNBatch() int32 {
- if x != nil {
- return x.NBatch
- }
- return 0
-}
-
-func (x *ModelOptions) GetF16Memory() bool {
- if x != nil {
- return x.F16Memory
- }
- return false
-}
-
-func (x *ModelOptions) GetMLock() bool {
- if x != nil {
- return x.MLock
- }
- return false
-}
-
-func (x *ModelOptions) GetMMap() bool {
- if x != nil {
- return x.MMap
- }
- return false
-}
-
-func (x *ModelOptions) GetVocabOnly() bool {
- if x != nil {
- return x.VocabOnly
- }
- return false
-}
-
-func (x *ModelOptions) GetLowVRAM() bool {
- if x != nil {
- return x.LowVRAM
- }
- return false
-}
-
-func (x *ModelOptions) GetEmbeddings() bool {
- if x != nil {
- return x.Embeddings
- }
- return false
-}
-
-func (x *ModelOptions) GetNUMA() bool {
- if x != nil {
- return x.NUMA
- }
- return false
-}
-
-func (x *ModelOptions) GetNGPULayers() int32 {
- if x != nil {
- return x.NGPULayers
- }
- return 0
-}
-
-func (x *ModelOptions) GetMainGPU() string {
- if x != nil {
- return x.MainGPU
- }
- return ""
-}
-
-func (x *ModelOptions) GetTensorSplit() string {
- if x != nil {
- return x.TensorSplit
- }
- return ""
-}
-
-func (x *ModelOptions) GetThreads() int32 {
- if x != nil {
- return x.Threads
- }
- return 0
-}
-
-func (x *ModelOptions) GetLibrarySearchPath() string {
- if x != nil {
- return x.LibrarySearchPath
- }
- return ""
-}
-
-func (x *ModelOptions) GetRopeFreqBase() float32 {
- if x != nil {
- return x.RopeFreqBase
- }
- return 0
-}
-
-func (x *ModelOptions) GetRopeFreqScale() float32 {
- if x != nil {
- return x.RopeFreqScale
- }
- return 0
-}
-
-func (x *ModelOptions) GetRMSNormEps() float32 {
- if x != nil {
- return x.RMSNormEps
- }
- return 0
-}
-
-func (x *ModelOptions) GetNGQA() int32 {
- if x != nil {
- return x.NGQA
- }
- return 0
-}
-
-func (x *ModelOptions) GetModelFile() string {
- if x != nil {
- return x.ModelFile
- }
- return ""
-}
-
-func (x *ModelOptions) GetDevice() string {
- if x != nil {
- return x.Device
- }
- return ""
-}
-
-func (x *ModelOptions) GetUseTriton() bool {
- if x != nil {
- return x.UseTriton
- }
- return false
-}
-
-func (x *ModelOptions) GetModelBaseName() string {
- if x != nil {
- return x.ModelBaseName
- }
- return ""
-}
-
-func (x *ModelOptions) GetUseFastTokenizer() bool {
- if x != nil {
- return x.UseFastTokenizer
- }
- return false
-}
-
-func (x *ModelOptions) GetPipelineType() string {
- if x != nil {
- return x.PipelineType
- }
- return ""
-}
-
-func (x *ModelOptions) GetSchedulerType() string {
- if x != nil {
- return x.SchedulerType
- }
- return ""
-}
-
-func (x *ModelOptions) GetCUDA() bool {
- if x != nil {
- return x.CUDA
- }
- return false
-}
-
-func (x *ModelOptions) GetCFGScale() float32 {
- if x != nil {
- return x.CFGScale
- }
- return 0
-}
-
-func (x *ModelOptions) GetIMG2IMG() bool {
- if x != nil {
- return x.IMG2IMG
- }
- return false
-}
-
-func (x *ModelOptions) GetCLIPModel() string {
- if x != nil {
- return x.CLIPModel
- }
- return ""
-}
-
-func (x *ModelOptions) GetCLIPSubfolder() string {
- if x != nil {
- return x.CLIPSubfolder
- }
- return ""
-}
-
-func (x *ModelOptions) GetCLIPSkip() int32 {
- if x != nil {
- return x.CLIPSkip
- }
- return 0
-}
-
-func (x *ModelOptions) GetControlNet() string {
- if x != nil {
- return x.ControlNet
- }
- return ""
-}
-
-func (x *ModelOptions) GetTokenizer() string {
- if x != nil {
- return x.Tokenizer
- }
- return ""
-}
-
-func (x *ModelOptions) GetLoraBase() string {
- if x != nil {
- return x.LoraBase
- }
- return ""
-}
-
-func (x *ModelOptions) GetLoraAdapter() string {
- if x != nil {
- return x.LoraAdapter
- }
- return ""
-}
-
-func (x *ModelOptions) GetLoraScale() float32 {
- if x != nil {
- return x.LoraScale
- }
- return 0
-}
-
-func (x *ModelOptions) GetNoMulMatQ() bool {
- if x != nil {
- return x.NoMulMatQ
- }
- return false
-}
-
-func (x *ModelOptions) GetDraftModel() string {
- if x != nil {
- return x.DraftModel
- }
- return ""
-}
-
-func (x *ModelOptions) GetAudioPath() string {
- if x != nil {
- return x.AudioPath
- }
- return ""
-}
-
-func (x *ModelOptions) GetQuantization() string {
- if x != nil {
- return x.Quantization
- }
- return ""
-}
-
-func (x *ModelOptions) GetGPUMemoryUtilization() float32 {
- if x != nil {
- return x.GPUMemoryUtilization
- }
- return 0
-}
-
-func (x *ModelOptions) GetTrustRemoteCode() bool {
- if x != nil {
- return x.TrustRemoteCode
- }
- return false
-}
-
-func (x *ModelOptions) GetEnforceEager() bool {
- if x != nil {
- return x.EnforceEager
- }
- return false
-}
-
-func (x *ModelOptions) GetSwapSpace() int32 {
- if x != nil {
- return x.SwapSpace
- }
- return 0
-}
-
-func (x *ModelOptions) GetMaxModelLen() int32 {
- if x != nil {
- return x.MaxModelLen
- }
- return 0
-}
-
-func (x *ModelOptions) GetMMProj() string {
- if x != nil {
- return x.MMProj
- }
- return ""
-}
-
-func (x *ModelOptions) GetRopeScaling() string {
- if x != nil {
- return x.RopeScaling
- }
- return ""
-}
-
-func (x *ModelOptions) GetYarnExtFactor() float32 {
- if x != nil {
- return x.YarnExtFactor
- }
- return 0
-}
-
-func (x *ModelOptions) GetYarnAttnFactor() float32 {
- if x != nil {
- return x.YarnAttnFactor
- }
- return 0
-}
-
-func (x *ModelOptions) GetYarnBetaFast() float32 {
- if x != nil {
- return x.YarnBetaFast
- }
- return 0
-}
-
-func (x *ModelOptions) GetYarnBetaSlow() float32 {
- if x != nil {
- return x.YarnBetaSlow
- }
- return 0
-}
-
-func (x *ModelOptions) GetType() string {
- if x != nil {
- return x.Type
- }
- return ""
-}
-
-type Result struct {
- state protoimpl.MessageState
- sizeCache protoimpl.SizeCache
- unknownFields protoimpl.UnknownFields
-
- Message string `protobuf:"bytes,1,opt,name=message,proto3" json:"message,omitempty"`
- Success bool `protobuf:"varint,2,opt,name=success,proto3" json:"success,omitempty"`
-}
-
-func (x *Result) Reset() {
- *x = Result{}
- if protoimpl.UnsafeEnabled {
- mi := &file_backend_proto_msgTypes[12]
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- ms.StoreMessageInfo(mi)
- }
-}
-
-func (x *Result) String() string {
- return protoimpl.X.MessageStringOf(x)
-}
-
-func (*Result) ProtoMessage() {}
-
-func (x *Result) ProtoReflect() protoreflect.Message {
- mi := &file_backend_proto_msgTypes[12]
- if protoimpl.UnsafeEnabled && x != nil {
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- if ms.LoadMessageInfo() == nil {
- ms.StoreMessageInfo(mi)
- }
- return ms
- }
- return mi.MessageOf(x)
-}
-
-// Deprecated: Use Result.ProtoReflect.Descriptor instead.
-func (*Result) Descriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{12}
-}
-
-func (x *Result) GetMessage() string {
- if x != nil {
- return x.Message
- }
- return ""
-}
-
-func (x *Result) GetSuccess() bool {
- if x != nil {
- return x.Success
- }
- return false
-}
-
-type EmbeddingResult struct {
- state protoimpl.MessageState
- sizeCache protoimpl.SizeCache
- unknownFields protoimpl.UnknownFields
-
- Embeddings []float32 `protobuf:"fixed32,1,rep,packed,name=embeddings,proto3" json:"embeddings,omitempty"`
-}
-
-func (x *EmbeddingResult) Reset() {
- *x = EmbeddingResult{}
- if protoimpl.UnsafeEnabled {
- mi := &file_backend_proto_msgTypes[13]
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- ms.StoreMessageInfo(mi)
- }
-}
-
-func (x *EmbeddingResult) String() string {
- return protoimpl.X.MessageStringOf(x)
-}
-
-func (*EmbeddingResult) ProtoMessage() {}
-
-func (x *EmbeddingResult) ProtoReflect() protoreflect.Message {
- mi := &file_backend_proto_msgTypes[13]
- if protoimpl.UnsafeEnabled && x != nil {
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- if ms.LoadMessageInfo() == nil {
- ms.StoreMessageInfo(mi)
- }
- return ms
- }
- return mi.MessageOf(x)
-}
-
-// Deprecated: Use EmbeddingResult.ProtoReflect.Descriptor instead.
-func (*EmbeddingResult) Descriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{13}
-}
-
-func (x *EmbeddingResult) GetEmbeddings() []float32 {
- if x != nil {
- return x.Embeddings
- }
- return nil
-}
-
-type TranscriptRequest struct {
- state protoimpl.MessageState
- sizeCache protoimpl.SizeCache
- unknownFields protoimpl.UnknownFields
-
- Dst string `protobuf:"bytes,2,opt,name=dst,proto3" json:"dst,omitempty"`
- Language string `protobuf:"bytes,3,opt,name=language,proto3" json:"language,omitempty"`
- Threads uint32 `protobuf:"varint,4,opt,name=threads,proto3" json:"threads,omitempty"`
-}
-
-func (x *TranscriptRequest) Reset() {
- *x = TranscriptRequest{}
- if protoimpl.UnsafeEnabled {
- mi := &file_backend_proto_msgTypes[14]
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- ms.StoreMessageInfo(mi)
- }
-}
-
-func (x *TranscriptRequest) String() string {
- return protoimpl.X.MessageStringOf(x)
-}
-
-func (*TranscriptRequest) ProtoMessage() {}
-
-func (x *TranscriptRequest) ProtoReflect() protoreflect.Message {
- mi := &file_backend_proto_msgTypes[14]
- if protoimpl.UnsafeEnabled && x != nil {
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- if ms.LoadMessageInfo() == nil {
- ms.StoreMessageInfo(mi)
- }
- return ms
- }
- return mi.MessageOf(x)
-}
-
-// Deprecated: Use TranscriptRequest.ProtoReflect.Descriptor instead.
-func (*TranscriptRequest) Descriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{14}
-}
-
-func (x *TranscriptRequest) GetDst() string {
- if x != nil {
- return x.Dst
- }
- return ""
-}
-
-func (x *TranscriptRequest) GetLanguage() string {
- if x != nil {
- return x.Language
- }
- return ""
-}
-
-func (x *TranscriptRequest) GetThreads() uint32 {
- if x != nil {
- return x.Threads
- }
- return 0
-}
-
-type TranscriptResult struct {
- state protoimpl.MessageState
- sizeCache protoimpl.SizeCache
- unknownFields protoimpl.UnknownFields
-
- Segments []*TranscriptSegment `protobuf:"bytes,1,rep,name=segments,proto3" json:"segments,omitempty"`
- Text string `protobuf:"bytes,2,opt,name=text,proto3" json:"text,omitempty"`
-}
-
-func (x *TranscriptResult) Reset() {
- *x = TranscriptResult{}
- if protoimpl.UnsafeEnabled {
- mi := &file_backend_proto_msgTypes[15]
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- ms.StoreMessageInfo(mi)
- }
-}
-
-func (x *TranscriptResult) String() string {
- return protoimpl.X.MessageStringOf(x)
-}
-
-func (*TranscriptResult) ProtoMessage() {}
-
-func (x *TranscriptResult) ProtoReflect() protoreflect.Message {
- mi := &file_backend_proto_msgTypes[15]
- if protoimpl.UnsafeEnabled && x != nil {
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- if ms.LoadMessageInfo() == nil {
- ms.StoreMessageInfo(mi)
- }
- return ms
- }
- return mi.MessageOf(x)
-}
-
-// Deprecated: Use TranscriptResult.ProtoReflect.Descriptor instead.
-func (*TranscriptResult) Descriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{15}
-}
-
-func (x *TranscriptResult) GetSegments() []*TranscriptSegment {
- if x != nil {
- return x.Segments
- }
- return nil
-}
-
-func (x *TranscriptResult) GetText() string {
- if x != nil {
- return x.Text
- }
- return ""
-}
-
-type TranscriptSegment struct {
- state protoimpl.MessageState
- sizeCache protoimpl.SizeCache
- unknownFields protoimpl.UnknownFields
-
- Id int32 `protobuf:"varint,1,opt,name=id,proto3" json:"id,omitempty"`
- Start int64 `protobuf:"varint,2,opt,name=start,proto3" json:"start,omitempty"`
- End int64 `protobuf:"varint,3,opt,name=end,proto3" json:"end,omitempty"`
- Text string `protobuf:"bytes,4,opt,name=text,proto3" json:"text,omitempty"`
- Tokens []int32 `protobuf:"varint,5,rep,packed,name=tokens,proto3" json:"tokens,omitempty"`
-}
-
-func (x *TranscriptSegment) Reset() {
- *x = TranscriptSegment{}
- if protoimpl.UnsafeEnabled {
- mi := &file_backend_proto_msgTypes[16]
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- ms.StoreMessageInfo(mi)
- }
-}
-
-func (x *TranscriptSegment) String() string {
- return protoimpl.X.MessageStringOf(x)
-}
-
-func (*TranscriptSegment) ProtoMessage() {}
-
-func (x *TranscriptSegment) ProtoReflect() protoreflect.Message {
- mi := &file_backend_proto_msgTypes[16]
- if protoimpl.UnsafeEnabled && x != nil {
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- if ms.LoadMessageInfo() == nil {
- ms.StoreMessageInfo(mi)
- }
- return ms
- }
- return mi.MessageOf(x)
-}
-
-// Deprecated: Use TranscriptSegment.ProtoReflect.Descriptor instead.
-func (*TranscriptSegment) Descriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{16}
-}
-
-func (x *TranscriptSegment) GetId() int32 {
- if x != nil {
- return x.Id
- }
- return 0
-}
-
-func (x *TranscriptSegment) GetStart() int64 {
- if x != nil {
- return x.Start
- }
- return 0
-}
-
-func (x *TranscriptSegment) GetEnd() int64 {
- if x != nil {
- return x.End
- }
- return 0
-}
-
-func (x *TranscriptSegment) GetText() string {
- if x != nil {
- return x.Text
- }
- return ""
-}
-
-func (x *TranscriptSegment) GetTokens() []int32 {
- if x != nil {
- return x.Tokens
- }
- return nil
-}
-
-type GenerateImageRequest struct {
- state protoimpl.MessageState
- sizeCache protoimpl.SizeCache
- unknownFields protoimpl.UnknownFields
-
- Height int32 `protobuf:"varint,1,opt,name=height,proto3" json:"height,omitempty"`
- Width int32 `protobuf:"varint,2,opt,name=width,proto3" json:"width,omitempty"`
- Mode int32 `protobuf:"varint,3,opt,name=mode,proto3" json:"mode,omitempty"`
- Step int32 `protobuf:"varint,4,opt,name=step,proto3" json:"step,omitempty"`
- Seed int32 `protobuf:"varint,5,opt,name=seed,proto3" json:"seed,omitempty"`
- PositivePrompt string `protobuf:"bytes,6,opt,name=positive_prompt,json=positivePrompt,proto3" json:"positive_prompt,omitempty"`
- NegativePrompt string `protobuf:"bytes,7,opt,name=negative_prompt,json=negativePrompt,proto3" json:"negative_prompt,omitempty"`
- Dst string `protobuf:"bytes,8,opt,name=dst,proto3" json:"dst,omitempty"`
- Src string `protobuf:"bytes,9,opt,name=src,proto3" json:"src,omitempty"`
- // Diffusers
- EnableParameters string `protobuf:"bytes,10,opt,name=EnableParameters,proto3" json:"EnableParameters,omitempty"`
- CLIPSkip int32 `protobuf:"varint,11,opt,name=CLIPSkip,proto3" json:"CLIPSkip,omitempty"`
-}
-
-func (x *GenerateImageRequest) Reset() {
- *x = GenerateImageRequest{}
- if protoimpl.UnsafeEnabled {
- mi := &file_backend_proto_msgTypes[17]
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- ms.StoreMessageInfo(mi)
- }
-}
-
-func (x *GenerateImageRequest) String() string {
- return protoimpl.X.MessageStringOf(x)
-}
-
-func (*GenerateImageRequest) ProtoMessage() {}
-
-func (x *GenerateImageRequest) ProtoReflect() protoreflect.Message {
- mi := &file_backend_proto_msgTypes[17]
- if protoimpl.UnsafeEnabled && x != nil {
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- if ms.LoadMessageInfo() == nil {
- ms.StoreMessageInfo(mi)
- }
- return ms
- }
- return mi.MessageOf(x)
-}
-
-// Deprecated: Use GenerateImageRequest.ProtoReflect.Descriptor instead.
-func (*GenerateImageRequest) Descriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{17}
-}
-
-func (x *GenerateImageRequest) GetHeight() int32 {
- if x != nil {
- return x.Height
- }
- return 0
-}
-
-func (x *GenerateImageRequest) GetWidth() int32 {
- if x != nil {
- return x.Width
- }
- return 0
-}
-
-func (x *GenerateImageRequest) GetMode() int32 {
- if x != nil {
- return x.Mode
- }
- return 0
-}
-
-func (x *GenerateImageRequest) GetStep() int32 {
- if x != nil {
- return x.Step
- }
- return 0
-}
-
-func (x *GenerateImageRequest) GetSeed() int32 {
- if x != nil {
- return x.Seed
- }
- return 0
-}
-
-func (x *GenerateImageRequest) GetPositivePrompt() string {
- if x != nil {
- return x.PositivePrompt
- }
- return ""
-}
-
-func (x *GenerateImageRequest) GetNegativePrompt() string {
- if x != nil {
- return x.NegativePrompt
- }
- return ""
-}
-
-func (x *GenerateImageRequest) GetDst() string {
- if x != nil {
- return x.Dst
- }
- return ""
-}
-
-func (x *GenerateImageRequest) GetSrc() string {
- if x != nil {
- return x.Src
- }
- return ""
-}
-
-func (x *GenerateImageRequest) GetEnableParameters() string {
- if x != nil {
- return x.EnableParameters
- }
- return ""
-}
-
-func (x *GenerateImageRequest) GetCLIPSkip() int32 {
- if x != nil {
- return x.CLIPSkip
- }
- return 0
-}
-
-type TTSRequest struct {
- state protoimpl.MessageState
- sizeCache protoimpl.SizeCache
- unknownFields protoimpl.UnknownFields
-
- Text string `protobuf:"bytes,1,opt,name=text,proto3" json:"text,omitempty"`
- Model string `protobuf:"bytes,2,opt,name=model,proto3" json:"model,omitempty"`
- Dst string `protobuf:"bytes,3,opt,name=dst,proto3" json:"dst,omitempty"`
- Voice string `protobuf:"bytes,4,opt,name=voice,proto3" json:"voice,omitempty"`
-}
-
-func (x *TTSRequest) Reset() {
- *x = TTSRequest{}
- if protoimpl.UnsafeEnabled {
- mi := &file_backend_proto_msgTypes[18]
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- ms.StoreMessageInfo(mi)
- }
-}
-
-func (x *TTSRequest) String() string {
- return protoimpl.X.MessageStringOf(x)
-}
-
-func (*TTSRequest) ProtoMessage() {}
-
-func (x *TTSRequest) ProtoReflect() protoreflect.Message {
- mi := &file_backend_proto_msgTypes[18]
- if protoimpl.UnsafeEnabled && x != nil {
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- if ms.LoadMessageInfo() == nil {
- ms.StoreMessageInfo(mi)
- }
- return ms
- }
- return mi.MessageOf(x)
-}
-
-// Deprecated: Use TTSRequest.ProtoReflect.Descriptor instead.
-func (*TTSRequest) Descriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{18}
-}
-
-func (x *TTSRequest) GetText() string {
- if x != nil {
- return x.Text
- }
- return ""
-}
-
-func (x *TTSRequest) GetModel() string {
- if x != nil {
- return x.Model
- }
- return ""
-}
-
-func (x *TTSRequest) GetDst() string {
- if x != nil {
- return x.Dst
- }
- return ""
-}
-
-func (x *TTSRequest) GetVoice() string {
- if x != nil {
- return x.Voice
- }
- return ""
-}
-
-type TokenizationResponse struct {
- state protoimpl.MessageState
- sizeCache protoimpl.SizeCache
- unknownFields protoimpl.UnknownFields
-
- Length int32 `protobuf:"varint,1,opt,name=length,proto3" json:"length,omitempty"`
- Tokens []int32 `protobuf:"varint,2,rep,packed,name=tokens,proto3" json:"tokens,omitempty"`
-}
-
-func (x *TokenizationResponse) Reset() {
- *x = TokenizationResponse{}
- if protoimpl.UnsafeEnabled {
- mi := &file_backend_proto_msgTypes[19]
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- ms.StoreMessageInfo(mi)
- }
-}
-
-func (x *TokenizationResponse) String() string {
- return protoimpl.X.MessageStringOf(x)
-}
-
-func (*TokenizationResponse) ProtoMessage() {}
-
-func (x *TokenizationResponse) ProtoReflect() protoreflect.Message {
- mi := &file_backend_proto_msgTypes[19]
- if protoimpl.UnsafeEnabled && x != nil {
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- if ms.LoadMessageInfo() == nil {
- ms.StoreMessageInfo(mi)
- }
- return ms
- }
- return mi.MessageOf(x)
-}
-
-// Deprecated: Use TokenizationResponse.ProtoReflect.Descriptor instead.
-func (*TokenizationResponse) Descriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{19}
-}
-
-func (x *TokenizationResponse) GetLength() int32 {
- if x != nil {
- return x.Length
- }
- return 0
-}
-
-func (x *TokenizationResponse) GetTokens() []int32 {
- if x != nil {
- return x.Tokens
- }
- return nil
-}
-
-type MemoryUsageData struct {
- state protoimpl.MessageState
- sizeCache protoimpl.SizeCache
- unknownFields protoimpl.UnknownFields
-
- Total uint64 `protobuf:"varint,1,opt,name=total,proto3" json:"total,omitempty"`
- Breakdown map[string]uint64 `protobuf:"bytes,2,rep,name=breakdown,proto3" json:"breakdown,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"varint,2,opt,name=value,proto3"`
-}
-
-func (x *MemoryUsageData) Reset() {
- *x = MemoryUsageData{}
- if protoimpl.UnsafeEnabled {
- mi := &file_backend_proto_msgTypes[20]
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- ms.StoreMessageInfo(mi)
- }
-}
-
-func (x *MemoryUsageData) String() string {
- return protoimpl.X.MessageStringOf(x)
-}
-
-func (*MemoryUsageData) ProtoMessage() {}
-
-func (x *MemoryUsageData) ProtoReflect() protoreflect.Message {
- mi := &file_backend_proto_msgTypes[20]
- if protoimpl.UnsafeEnabled && x != nil {
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- if ms.LoadMessageInfo() == nil {
- ms.StoreMessageInfo(mi)
- }
- return ms
- }
- return mi.MessageOf(x)
-}
-
-// Deprecated: Use MemoryUsageData.ProtoReflect.Descriptor instead.
-func (*MemoryUsageData) Descriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{20}
-}
-
-func (x *MemoryUsageData) GetTotal() uint64 {
- if x != nil {
- return x.Total
- }
- return 0
-}
-
-func (x *MemoryUsageData) GetBreakdown() map[string]uint64 {
- if x != nil {
- return x.Breakdown
- }
- return nil
-}
-
-type StatusResponse struct {
- state protoimpl.MessageState
- sizeCache protoimpl.SizeCache
- unknownFields protoimpl.UnknownFields
-
- State StatusResponse_State `protobuf:"varint,1,opt,name=state,proto3,enum=backend.StatusResponse_State" json:"state,omitempty"`
- Memory *MemoryUsageData `protobuf:"bytes,2,opt,name=memory,proto3" json:"memory,omitempty"`
-}
-
-func (x *StatusResponse) Reset() {
- *x = StatusResponse{}
- if protoimpl.UnsafeEnabled {
- mi := &file_backend_proto_msgTypes[21]
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- ms.StoreMessageInfo(mi)
- }
-}
-
-func (x *StatusResponse) String() string {
- return protoimpl.X.MessageStringOf(x)
-}
-
-func (*StatusResponse) ProtoMessage() {}
-
-func (x *StatusResponse) ProtoReflect() protoreflect.Message {
- mi := &file_backend_proto_msgTypes[21]
- if protoimpl.UnsafeEnabled && x != nil {
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- if ms.LoadMessageInfo() == nil {
- ms.StoreMessageInfo(mi)
- }
- return ms
- }
- return mi.MessageOf(x)
-}
-
-// Deprecated: Use StatusResponse.ProtoReflect.Descriptor instead.
-func (*StatusResponse) Descriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{21}
-}
-
-func (x *StatusResponse) GetState() StatusResponse_State {
- if x != nil {
- return x.State
- }
- return StatusResponse_UNINITIALIZED
-}
-
-func (x *StatusResponse) GetMemory() *MemoryUsageData {
- if x != nil {
- return x.Memory
- }
- return nil
-}
-
-type Message struct {
- state protoimpl.MessageState
- sizeCache protoimpl.SizeCache
- unknownFields protoimpl.UnknownFields
-
- Role string `protobuf:"bytes,1,opt,name=role,proto3" json:"role,omitempty"`
- Content string `protobuf:"bytes,2,opt,name=content,proto3" json:"content,omitempty"`
-}
-
-func (x *Message) Reset() {
- *x = Message{}
- if protoimpl.UnsafeEnabled {
- mi := &file_backend_proto_msgTypes[22]
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- ms.StoreMessageInfo(mi)
- }
-}
-
-func (x *Message) String() string {
- return protoimpl.X.MessageStringOf(x)
-}
-
-func (*Message) ProtoMessage() {}
-
-func (x *Message) ProtoReflect() protoreflect.Message {
- mi := &file_backend_proto_msgTypes[22]
- if protoimpl.UnsafeEnabled && x != nil {
- ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
- if ms.LoadMessageInfo() == nil {
- ms.StoreMessageInfo(mi)
- }
- return ms
- }
- return mi.MessageOf(x)
-}
-
-// Deprecated: Use Message.ProtoReflect.Descriptor instead.
-func (*Message) Descriptor() ([]byte, []int) {
- return file_backend_proto_rawDescGZIP(), []int{22}
-}
-
-func (x *Message) GetRole() string {
- if x != nil {
- return x.Role
- }
- return ""
-}
-
-func (x *Message) GetContent() string {
- if x != nil {
- return x.Content
- }
- return ""
-}
-
-var File_backend_proto protoreflect.FileDescriptor
-
-var file_backend_proto_rawDesc = []byte{
- 0x0a, 0x0d, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12,
- 0x07, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x22, 0x23, 0x0a, 0x09, 0x53, 0x74, 0x6f, 0x72,
- 0x65, 0x73, 0x4b, 0x65, 0x79, 0x12, 0x16, 0x0a, 0x06, 0x46, 0x6c, 0x6f, 0x61, 0x74, 0x73, 0x18,
- 0x01, 0x20, 0x03, 0x28, 0x02, 0x52, 0x06, 0x46, 0x6c, 0x6f, 0x61, 0x74, 0x73, 0x22, 0x23, 0x0a,
- 0x0b, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x14, 0x0a, 0x05,
- 0x42, 0x79, 0x74, 0x65, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x05, 0x42, 0x79, 0x74,
- 0x65, 0x73, 0x22, 0x68, 0x0a, 0x10, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x53, 0x65, 0x74, 0x4f,
- 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x26, 0x0a, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x18, 0x01,
- 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53,
- 0x74, 0x6f, 0x72, 0x65, 0x73, 0x4b, 0x65, 0x79, 0x52, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x12, 0x2c,
- 0x0a, 0x06, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x14,
- 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x56,
- 0x61, 0x6c, 0x75, 0x65, 0x52, 0x06, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x22, 0x3d, 0x0a, 0x13,
- 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x4f, 0x70, 0x74, 0x69,
- 0x6f, 0x6e, 0x73, 0x12, 0x26, 0x0a, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28,
- 0x0b, 0x32, 0x12, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72,
- 0x65, 0x73, 0x4b, 0x65, 0x79, 0x52, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x22, 0x3a, 0x0a, 0x10, 0x53,
- 0x74, 0x6f, 0x72, 0x65, 0x73, 0x47, 0x65, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12,
- 0x26, 0x0a, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e,
- 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x4b, 0x65,
- 0x79, 0x52, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x22, 0x67, 0x0a, 0x0f, 0x53, 0x74, 0x6f, 0x72, 0x65,
- 0x73, 0x47, 0x65, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x26, 0x0a, 0x04, 0x4b, 0x65,
- 0x79, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65,
- 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x4b, 0x65, 0x79, 0x52, 0x04, 0x4b, 0x65,
- 0x79, 0x73, 0x12, 0x2c, 0x0a, 0x06, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03,
- 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f,
- 0x72, 0x65, 0x73, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, 0x06, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73,
- 0x22, 0x4d, 0x0a, 0x11, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69, 0x6e, 0x64, 0x4f, 0x70,
- 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x24, 0x0a, 0x03, 0x4b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01,
- 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f,
- 0x72, 0x65, 0x73, 0x4b, 0x65, 0x79, 0x52, 0x03, 0x4b, 0x65, 0x79, 0x12, 0x12, 0x0a, 0x04, 0x54,
- 0x6f, 0x70, 0x4b, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x54, 0x6f, 0x70, 0x4b, 0x22,
- 0x8c, 0x01, 0x0a, 0x10, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69, 0x6e, 0x64, 0x52, 0x65,
- 0x73, 0x75, 0x6c, 0x74, 0x12, 0x26, 0x0a, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x18, 0x01, 0x20, 0x03,
- 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f,
- 0x72, 0x65, 0x73, 0x4b, 0x65, 0x79, 0x52, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x12, 0x2c, 0x0a, 0x06,
- 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x62,
- 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x56, 0x61, 0x6c,
- 0x75, 0x65, 0x52, 0x06, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x12, 0x22, 0x0a, 0x0c, 0x53, 0x69,
- 0x6d, 0x69, 0x6c, 0x61, 0x72, 0x69, 0x74, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x02,
- 0x52, 0x0c, 0x53, 0x69, 0x6d, 0x69, 0x6c, 0x61, 0x72, 0x69, 0x74, 0x69, 0x65, 0x73, 0x22, 0x0f,
- 0x0a, 0x0d, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22,
- 0xd6, 0x0a, 0x0a, 0x0e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f,
- 0x6e, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x01, 0x20, 0x01,
- 0x28, 0x09, 0x52, 0x06, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x53, 0x65,
- 0x65, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x53, 0x65, 0x65, 0x64, 0x12, 0x18,
- 0x0a, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52,
- 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x54, 0x6f, 0x6b, 0x65,
- 0x6e, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73,
- 0x12, 0x12, 0x0a, 0x04, 0x54, 0x6f, 0x70, 0x4b, 0x18, 0x05, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04,
- 0x54, 0x6f, 0x70, 0x4b, 0x12, 0x16, 0x0a, 0x06, 0x52, 0x65, 0x70, 0x65, 0x61, 0x74, 0x18, 0x06,
- 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x52, 0x65, 0x70, 0x65, 0x61, 0x74, 0x12, 0x14, 0x0a, 0x05,
- 0x42, 0x61, 0x74, 0x63, 0x68, 0x18, 0x07, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x42, 0x61, 0x74,
- 0x63, 0x68, 0x12, 0x14, 0x0a, 0x05, 0x4e, 0x4b, 0x65, 0x65, 0x70, 0x18, 0x08, 0x20, 0x01, 0x28,
- 0x05, 0x52, 0x05, 0x4e, 0x4b, 0x65, 0x65, 0x70, 0x12, 0x20, 0x0a, 0x0b, 0x54, 0x65, 0x6d, 0x70,
- 0x65, 0x72, 0x61, 0x74, 0x75, 0x72, 0x65, 0x18, 0x09, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0b, 0x54,
- 0x65, 0x6d, 0x70, 0x65, 0x72, 0x61, 0x74, 0x75, 0x72, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x50, 0x65,
- 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x02, 0x52, 0x07, 0x50, 0x65, 0x6e,
- 0x61, 0x6c, 0x74, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x46, 0x31, 0x36, 0x4b, 0x56, 0x18, 0x0b, 0x20,
- 0x01, 0x28, 0x08, 0x52, 0x05, 0x46, 0x31, 0x36, 0x4b, 0x56, 0x12, 0x1c, 0x0a, 0x09, 0x44, 0x65,
- 0x62, 0x75, 0x67, 0x4d, 0x6f, 0x64, 0x65, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x44,
- 0x65, 0x62, 0x75, 0x67, 0x4d, 0x6f, 0x64, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x53, 0x74, 0x6f, 0x70,
- 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x73, 0x18, 0x0d, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0b, 0x53,
- 0x74, 0x6f, 0x70, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x49, 0x67,
- 0x6e, 0x6f, 0x72, 0x65, 0x45, 0x4f, 0x53, 0x18, 0x0e, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x49,
- 0x67, 0x6e, 0x6f, 0x72, 0x65, 0x45, 0x4f, 0x53, 0x12, 0x2c, 0x0a, 0x11, 0x54, 0x61, 0x69, 0x6c,
- 0x46, 0x72, 0x65, 0x65, 0x53, 0x61, 0x6d, 0x70, 0x6c, 0x69, 0x6e, 0x67, 0x5a, 0x18, 0x0f, 0x20,
- 0x01, 0x28, 0x02, 0x52, 0x11, 0x54, 0x61, 0x69, 0x6c, 0x46, 0x72, 0x65, 0x65, 0x53, 0x61, 0x6d,
- 0x70, 0x6c, 0x69, 0x6e, 0x67, 0x5a, 0x12, 0x1a, 0x0a, 0x08, 0x54, 0x79, 0x70, 0x69, 0x63, 0x61,
- 0x6c, 0x50, 0x18, 0x10, 0x20, 0x01, 0x28, 0x02, 0x52, 0x08, 0x54, 0x79, 0x70, 0x69, 0x63, 0x61,
- 0x6c, 0x50, 0x12, 0x2a, 0x0a, 0x10, 0x46, 0x72, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x63, 0x79, 0x50,
- 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x18, 0x11, 0x20, 0x01, 0x28, 0x02, 0x52, 0x10, 0x46, 0x72,
- 0x65, 0x71, 0x75, 0x65, 0x6e, 0x63, 0x79, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x12, 0x28,
- 0x0a, 0x0f, 0x50, 0x72, 0x65, 0x73, 0x65, 0x6e, 0x63, 0x65, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74,
- 0x79, 0x18, 0x12, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0f, 0x50, 0x72, 0x65, 0x73, 0x65, 0x6e, 0x63,
- 0x65, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x12, 0x1a, 0x0a, 0x08, 0x4d, 0x69, 0x72, 0x6f,
- 0x73, 0x74, 0x61, 0x74, 0x18, 0x13, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x4d, 0x69, 0x72, 0x6f,
- 0x73, 0x74, 0x61, 0x74, 0x12, 0x20, 0x0a, 0x0b, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74,
- 0x45, 0x54, 0x41, 0x18, 0x14, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0b, 0x4d, 0x69, 0x72, 0x6f, 0x73,
- 0x74, 0x61, 0x74, 0x45, 0x54, 0x41, 0x12, 0x20, 0x0a, 0x0b, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74,
- 0x61, 0x74, 0x54, 0x41, 0x55, 0x18, 0x15, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0b, 0x4d, 0x69, 0x72,
- 0x6f, 0x73, 0x74, 0x61, 0x74, 0x54, 0x41, 0x55, 0x12, 0x1e, 0x0a, 0x0a, 0x50, 0x65, 0x6e, 0x61,
- 0x6c, 0x69, 0x7a, 0x65, 0x4e, 0x4c, 0x18, 0x16, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, 0x50, 0x65,
- 0x6e, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x4e, 0x4c, 0x12, 0x1c, 0x0a, 0x09, 0x4c, 0x6f, 0x67, 0x69,
- 0x74, 0x42, 0x69, 0x61, 0x73, 0x18, 0x17, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x4c, 0x6f, 0x67,
- 0x69, 0x74, 0x42, 0x69, 0x61, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x18,
- 0x19, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x12, 0x12, 0x0a, 0x04,
- 0x4d, 0x4d, 0x61, 0x70, 0x18, 0x1a, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x4d, 0x4d, 0x61, 0x70,
- 0x12, 0x26, 0x0a, 0x0e, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x41,
- 0x6c, 0x6c, 0x18, 0x1b, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0e, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74,
- 0x43, 0x61, 0x63, 0x68, 0x65, 0x41, 0x6c, 0x6c, 0x12, 0x24, 0x0a, 0x0d, 0x50, 0x72, 0x6f, 0x6d,
- 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x52, 0x4f, 0x18, 0x1c, 0x20, 0x01, 0x28, 0x08, 0x52,
- 0x0d, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x52, 0x4f, 0x12, 0x18,
- 0x0a, 0x07, 0x47, 0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72, 0x18, 0x1d, 0x20, 0x01, 0x28, 0x09, 0x52,
- 0x07, 0x47, 0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72, 0x12, 0x18, 0x0a, 0x07, 0x4d, 0x61, 0x69, 0x6e,
- 0x47, 0x50, 0x55, 0x18, 0x1e, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47,
- 0x50, 0x55, 0x12, 0x20, 0x0a, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69,
- 0x74, 0x18, 0x1f, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53,
- 0x70, 0x6c, 0x69, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x54, 0x6f, 0x70, 0x50, 0x18, 0x20, 0x20, 0x01,
- 0x28, 0x02, 0x52, 0x04, 0x54, 0x6f, 0x70, 0x50, 0x12, 0x28, 0x0a, 0x0f, 0x50, 0x72, 0x6f, 0x6d,
- 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x50, 0x61, 0x74, 0x68, 0x18, 0x21, 0x20, 0x01, 0x28,
- 0x09, 0x52, 0x0f, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x50, 0x61,
- 0x74, 0x68, 0x12, 0x14, 0x0a, 0x05, 0x44, 0x65, 0x62, 0x75, 0x67, 0x18, 0x22, 0x20, 0x01, 0x28,
- 0x08, 0x52, 0x05, 0x44, 0x65, 0x62, 0x75, 0x67, 0x12, 0x28, 0x0a, 0x0f, 0x45, 0x6d, 0x62, 0x65,
- 0x64, 0x64, 0x69, 0x6e, 0x67, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x23, 0x20, 0x03, 0x28,
- 0x05, 0x52, 0x0f, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x54, 0x6f, 0x6b, 0x65,
- 0x6e, 0x73, 0x12, 0x1e, 0x0a, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73,
- 0x18, 0x24, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e,
- 0x67, 0x73, 0x12, 0x22, 0x0a, 0x0c, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x42, 0x61,
- 0x73, 0x65, 0x18, 0x25, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72,
- 0x65, 0x71, 0x42, 0x61, 0x73, 0x65, 0x12, 0x24, 0x0a, 0x0d, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72,
- 0x65, 0x71, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x26, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0d, 0x52,
- 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x30, 0x0a, 0x13,
- 0x4e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x53, 0x63,
- 0x61, 0x6c, 0x65, 0x18, 0x27, 0x20, 0x01, 0x28, 0x02, 0x52, 0x13, 0x4e, 0x65, 0x67, 0x61, 0x74,
- 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x26,
- 0x0a, 0x0e, 0x4e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74,
- 0x18, 0x28, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x4e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65,
- 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x4e, 0x44, 0x72, 0x61, 0x66, 0x74,
- 0x18, 0x29, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x4e, 0x44, 0x72, 0x61, 0x66, 0x74, 0x12, 0x16,
- 0x0a, 0x06, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x73, 0x18, 0x2a, 0x20, 0x03, 0x28, 0x09, 0x52, 0x06,
- 0x49, 0x6d, 0x61, 0x67, 0x65, 0x73, 0x12, 0x32, 0x0a, 0x14, 0x55, 0x73, 0x65, 0x54, 0x6f, 0x6b,
- 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x18, 0x2b,
- 0x20, 0x01, 0x28, 0x08, 0x52, 0x14, 0x55, 0x73, 0x65, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a,
- 0x65, 0x72, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x12, 0x2c, 0x0a, 0x08, 0x4d, 0x65,
- 0x73, 0x73, 0x61, 0x67, 0x65, 0x73, 0x18, 0x2c, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x10, 0x2e, 0x62,
- 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x52, 0x08,
- 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x73, 0x22, 0x21, 0x0a, 0x05, 0x52, 0x65, 0x70, 0x6c,
- 0x79, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01,
- 0x28, 0x0c, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0xa2, 0x0d, 0x0a, 0x0c,
- 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x14, 0x0a, 0x05,
- 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x4d, 0x6f, 0x64,
- 0x65, 0x6c, 0x12, 0x20, 0x0a, 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x53, 0x69, 0x7a,
- 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74,
- 0x53, 0x69, 0x7a, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x53, 0x65, 0x65, 0x64, 0x18, 0x03, 0x20, 0x01,
- 0x28, 0x05, 0x52, 0x04, 0x53, 0x65, 0x65, 0x64, 0x12, 0x16, 0x0a, 0x06, 0x4e, 0x42, 0x61, 0x74,
- 0x63, 0x68, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x4e, 0x42, 0x61, 0x74, 0x63, 0x68,
- 0x12, 0x1c, 0x0a, 0x09, 0x46, 0x31, 0x36, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x18, 0x05, 0x20,
- 0x01, 0x28, 0x08, 0x52, 0x09, 0x46, 0x31, 0x36, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x12, 0x14,
- 0x0a, 0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x4d,
- 0x4c, 0x6f, 0x63, 0x6b, 0x12, 0x12, 0x0a, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x18, 0x07, 0x20, 0x01,
- 0x28, 0x08, 0x52, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x12, 0x1c, 0x0a, 0x09, 0x56, 0x6f, 0x63, 0x61,
- 0x62, 0x4f, 0x6e, 0x6c, 0x79, 0x18, 0x08, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x56, 0x6f, 0x63,
- 0x61, 0x62, 0x4f, 0x6e, 0x6c, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x4c, 0x6f, 0x77, 0x56, 0x52, 0x41,
- 0x4d, 0x18, 0x09, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x4c, 0x6f, 0x77, 0x56, 0x52, 0x41, 0x4d,
- 0x12, 0x1e, 0x0a, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x0a,
- 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73,
- 0x12, 0x12, 0x0a, 0x04, 0x4e, 0x55, 0x4d, 0x41, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04,
- 0x4e, 0x55, 0x4d, 0x41, 0x12, 0x1e, 0x0a, 0x0a, 0x4e, 0x47, 0x50, 0x55, 0x4c, 0x61, 0x79, 0x65,
- 0x72, 0x73, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0a, 0x4e, 0x47, 0x50, 0x55, 0x4c, 0x61,
- 0x79, 0x65, 0x72, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x18,
- 0x0d, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x12, 0x20,
- 0x0a, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74, 0x18, 0x0e, 0x20,
- 0x01, 0x28, 0x09, 0x52, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74,
- 0x12, 0x18, 0x0a, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, 0x0f, 0x20, 0x01, 0x28,
- 0x05, 0x52, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x12, 0x2c, 0x0a, 0x11, 0x4c, 0x69,
- 0x62, 0x72, 0x61, 0x72, 0x79, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, 0x50, 0x61, 0x74, 0x68, 0x18,
- 0x10, 0x20, 0x01, 0x28, 0x09, 0x52, 0x11, 0x4c, 0x69, 0x62, 0x72, 0x61, 0x72, 0x79, 0x53, 0x65,
- 0x61, 0x72, 0x63, 0x68, 0x50, 0x61, 0x74, 0x68, 0x12, 0x22, 0x0a, 0x0c, 0x52, 0x6f, 0x70, 0x65,
- 0x46, 0x72, 0x65, 0x71, 0x42, 0x61, 0x73, 0x65, 0x18, 0x11, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c,
- 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x42, 0x61, 0x73, 0x65, 0x12, 0x24, 0x0a, 0x0d,
- 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x12, 0x20,
- 0x01, 0x28, 0x02, 0x52, 0x0d, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61,
- 0x6c, 0x65, 0x12, 0x1e, 0x0a, 0x0a, 0x52, 0x4d, 0x53, 0x4e, 0x6f, 0x72, 0x6d, 0x45, 0x70, 0x73,
- 0x18, 0x13, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0a, 0x52, 0x4d, 0x53, 0x4e, 0x6f, 0x72, 0x6d, 0x45,
- 0x70, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x4e, 0x47, 0x51, 0x41, 0x18, 0x14, 0x20, 0x01, 0x28, 0x05,
- 0x52, 0x04, 0x4e, 0x47, 0x51, 0x41, 0x12, 0x1c, 0x0a, 0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x46,
- 0x69, 0x6c, 0x65, 0x18, 0x15, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c,
- 0x46, 0x69, 0x6c, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x44, 0x65, 0x76, 0x69, 0x63, 0x65, 0x18, 0x16,
- 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x44, 0x65, 0x76, 0x69, 0x63, 0x65, 0x12, 0x1c, 0x0a, 0x09,
- 0x55, 0x73, 0x65, 0x54, 0x72, 0x69, 0x74, 0x6f, 0x6e, 0x18, 0x17, 0x20, 0x01, 0x28, 0x08, 0x52,
- 0x09, 0x55, 0x73, 0x65, 0x54, 0x72, 0x69, 0x74, 0x6f, 0x6e, 0x12, 0x24, 0x0a, 0x0d, 0x4d, 0x6f,
- 0x64, 0x65, 0x6c, 0x42, 0x61, 0x73, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x18, 0x18, 0x20, 0x01, 0x28,
- 0x09, 0x52, 0x0d, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x42, 0x61, 0x73, 0x65, 0x4e, 0x61, 0x6d, 0x65,
- 0x12, 0x2a, 0x0a, 0x10, 0x55, 0x73, 0x65, 0x46, 0x61, 0x73, 0x74, 0x54, 0x6f, 0x6b, 0x65, 0x6e,
- 0x69, 0x7a, 0x65, 0x72, 0x18, 0x19, 0x20, 0x01, 0x28, 0x08, 0x52, 0x10, 0x55, 0x73, 0x65, 0x46,
- 0x61, 0x73, 0x74, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x12, 0x22, 0x0a, 0x0c,
- 0x50, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x54, 0x79, 0x70, 0x65, 0x18, 0x1a, 0x20, 0x01,
- 0x28, 0x09, 0x52, 0x0c, 0x50, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x54, 0x79, 0x70, 0x65,
- 0x12, 0x24, 0x0a, 0x0d, 0x53, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, 0x65, 0x72, 0x54, 0x79, 0x70,
- 0x65, 0x18, 0x1b, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x53, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c,
- 0x65, 0x72, 0x54, 0x79, 0x70, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x43, 0x55, 0x44, 0x41, 0x18, 0x1c,
- 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x43, 0x55, 0x44, 0x41, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x46,
- 0x47, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x1d, 0x20, 0x01, 0x28, 0x02, 0x52, 0x08, 0x43, 0x46,
- 0x47, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x49, 0x4d, 0x47, 0x32, 0x49, 0x4d,
- 0x47, 0x18, 0x1e, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x49, 0x4d, 0x47, 0x32, 0x49, 0x4d, 0x47,
- 0x12, 0x1c, 0x0a, 0x09, 0x43, 0x4c, 0x49, 0x50, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x1f, 0x20,
- 0x01, 0x28, 0x09, 0x52, 0x09, 0x43, 0x4c, 0x49, 0x50, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x24,
- 0x0a, 0x0d, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x75, 0x62, 0x66, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x18,
- 0x20, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x75, 0x62, 0x66, 0x6f,
- 0x6c, 0x64, 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70,
- 0x18, 0x21, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70,
- 0x12, 0x1e, 0x0a, 0x0a, 0x43, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x4e, 0x65, 0x74, 0x18, 0x30,
- 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x43, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x4e, 0x65, 0x74,
- 0x12, 0x1c, 0x0a, 0x09, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x18, 0x22, 0x20,
- 0x01, 0x28, 0x09, 0x52, 0x09, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x12, 0x1a,
- 0x0a, 0x08, 0x4c, 0x6f, 0x72, 0x61, 0x42, 0x61, 0x73, 0x65, 0x18, 0x23, 0x20, 0x01, 0x28, 0x09,
- 0x52, 0x08, 0x4c, 0x6f, 0x72, 0x61, 0x42, 0x61, 0x73, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x4c, 0x6f,
- 0x72, 0x61, 0x41, 0x64, 0x61, 0x70, 0x74, 0x65, 0x72, 0x18, 0x24, 0x20, 0x01, 0x28, 0x09, 0x52,
- 0x0b, 0x4c, 0x6f, 0x72, 0x61, 0x41, 0x64, 0x61, 0x70, 0x74, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09,
- 0x4c, 0x6f, 0x72, 0x61, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x2a, 0x20, 0x01, 0x28, 0x02, 0x52,
- 0x09, 0x4c, 0x6f, 0x72, 0x61, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x1c, 0x0a, 0x09, 0x4e, 0x6f,
- 0x4d, 0x75, 0x6c, 0x4d, 0x61, 0x74, 0x51, 0x18, 0x25, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x4e,
- 0x6f, 0x4d, 0x75, 0x6c, 0x4d, 0x61, 0x74, 0x51, 0x12, 0x1e, 0x0a, 0x0a, 0x44, 0x72, 0x61, 0x66,
- 0x74, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x27, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x44, 0x72,
- 0x61, 0x66, 0x74, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x1c, 0x0a, 0x09, 0x41, 0x75, 0x64, 0x69,
- 0x6f, 0x50, 0x61, 0x74, 0x68, 0x18, 0x26, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x41, 0x75, 0x64,
- 0x69, 0x6f, 0x50, 0x61, 0x74, 0x68, 0x12, 0x22, 0x0a, 0x0c, 0x51, 0x75, 0x61, 0x6e, 0x74, 0x69,
- 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x28, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x51, 0x75,
- 0x61, 0x6e, 0x74, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x32, 0x0a, 0x14, 0x47, 0x50,
- 0x55, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x74, 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, 0x69,
- 0x6f, 0x6e, 0x18, 0x32, 0x20, 0x01, 0x28, 0x02, 0x52, 0x14, 0x47, 0x50, 0x55, 0x4d, 0x65, 0x6d,
- 0x6f, 0x72, 0x79, 0x55, 0x74, 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x28,
- 0x0a, 0x0f, 0x54, 0x72, 0x75, 0x73, 0x74, 0x52, 0x65, 0x6d, 0x6f, 0x74, 0x65, 0x43, 0x6f, 0x64,
- 0x65, 0x18, 0x33, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0f, 0x54, 0x72, 0x75, 0x73, 0x74, 0x52, 0x65,
- 0x6d, 0x6f, 0x74, 0x65, 0x43, 0x6f, 0x64, 0x65, 0x12, 0x22, 0x0a, 0x0c, 0x45, 0x6e, 0x66, 0x6f,
- 0x72, 0x63, 0x65, 0x45, 0x61, 0x67, 0x65, 0x72, 0x18, 0x34, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0c,
- 0x45, 0x6e, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x45, 0x61, 0x67, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09,
- 0x53, 0x77, 0x61, 0x70, 0x53, 0x70, 0x61, 0x63, 0x65, 0x18, 0x35, 0x20, 0x01, 0x28, 0x05, 0x52,
- 0x09, 0x53, 0x77, 0x61, 0x70, 0x53, 0x70, 0x61, 0x63, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x4d, 0x61,
- 0x78, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4c, 0x65, 0x6e, 0x18, 0x36, 0x20, 0x01, 0x28, 0x05, 0x52,
- 0x0b, 0x4d, 0x61, 0x78, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4c, 0x65, 0x6e, 0x12, 0x16, 0x0a, 0x06,
- 0x4d, 0x4d, 0x50, 0x72, 0x6f, 0x6a, 0x18, 0x29, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x4d, 0x4d,
- 0x50, 0x72, 0x6f, 0x6a, 0x12, 0x20, 0x0a, 0x0b, 0x52, 0x6f, 0x70, 0x65, 0x53, 0x63, 0x61, 0x6c,
- 0x69, 0x6e, 0x67, 0x18, 0x2b, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x52, 0x6f, 0x70, 0x65, 0x53,
- 0x63, 0x61, 0x6c, 0x69, 0x6e, 0x67, 0x12, 0x24, 0x0a, 0x0d, 0x59, 0x61, 0x72, 0x6e, 0x45, 0x78,
- 0x74, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x18, 0x2c, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0d, 0x59,
- 0x61, 0x72, 0x6e, 0x45, 0x78, 0x74, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x12, 0x26, 0x0a, 0x0e,
- 0x59, 0x61, 0x72, 0x6e, 0x41, 0x74, 0x74, 0x6e, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x18, 0x2d,
- 0x20, 0x01, 0x28, 0x02, 0x52, 0x0e, 0x59, 0x61, 0x72, 0x6e, 0x41, 0x74, 0x74, 0x6e, 0x46, 0x61,
- 0x63, 0x74, 0x6f, 0x72, 0x12, 0x22, 0x0a, 0x0c, 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61,
- 0x46, 0x61, 0x73, 0x74, 0x18, 0x2e, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, 0x59, 0x61, 0x72, 0x6e,
- 0x42, 0x65, 0x74, 0x61, 0x46, 0x61, 0x73, 0x74, 0x12, 0x22, 0x0a, 0x0c, 0x59, 0x61, 0x72, 0x6e,
- 0x42, 0x65, 0x74, 0x61, 0x53, 0x6c, 0x6f, 0x77, 0x18, 0x2f, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c,
- 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, 0x53, 0x6c, 0x6f, 0x77, 0x12, 0x12, 0x0a, 0x04,
- 0x54, 0x79, 0x70, 0x65, 0x18, 0x31, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x54, 0x79, 0x70, 0x65,
- 0x22, 0x3c, 0x0a, 0x06, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65,
- 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73,
- 0x73, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x18,
- 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x22, 0x31,
- 0x0a, 0x0f, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c,
- 0x74, 0x12, 0x1e, 0x0a, 0x0a, 0x65, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18,
- 0x01, 0x20, 0x03, 0x28, 0x02, 0x52, 0x0a, 0x65, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67,
- 0x73, 0x22, 0x5b, 0x0a, 0x11, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52,
- 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x02, 0x20,
- 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x6c, 0x61, 0x6e, 0x67,
- 0x75, 0x61, 0x67, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x6c, 0x61, 0x6e, 0x67,
- 0x75, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x74, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18,
- 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x07, 0x74, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x22, 0x5e,
- 0x0a, 0x10, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x73, 0x75,
- 0x6c, 0x74, 0x12, 0x36, 0x0a, 0x08, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x01,
- 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54,
- 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74,
- 0x52, 0x08, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65,
- 0x78, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x22, 0x77,
- 0x0a, 0x11, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x53, 0x65, 0x67, 0x6d,
- 0x65, 0x6e, 0x74, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52,
- 0x02, 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x18, 0x02, 0x20, 0x01,
- 0x28, 0x03, 0x52, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x65, 0x6e, 0x64,
- 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x03, 0x65, 0x6e, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x74,
- 0x65, 0x78, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12,
- 0x16, 0x0a, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x05, 0x52,
- 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x22, 0xbe, 0x02, 0x0a, 0x14, 0x47, 0x65, 0x6e, 0x65,
- 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74,
- 0x12, 0x16, 0x0a, 0x06, 0x68, 0x65, 0x69, 0x67, 0x68, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05,
- 0x52, 0x06, 0x68, 0x65, 0x69, 0x67, 0x68, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x77, 0x69, 0x64, 0x74,
- 0x68, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x77, 0x69, 0x64, 0x74, 0x68, 0x12, 0x12,
- 0x0a, 0x04, 0x6d, 0x6f, 0x64, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x6d, 0x6f,
- 0x64, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x74, 0x65, 0x70, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05,
- 0x52, 0x04, 0x73, 0x74, 0x65, 0x70, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x65, 0x65, 0x64, 0x18, 0x05,
- 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x73, 0x65, 0x65, 0x64, 0x12, 0x27, 0x0a, 0x0f, 0x70, 0x6f,
- 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x06, 0x20,
- 0x01, 0x28, 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f,
- 0x6d, 0x70, 0x74, 0x12, 0x27, 0x0a, 0x0f, 0x6e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x5f,
- 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x6e, 0x65,
- 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x10, 0x0a, 0x03,
- 0x64, 0x73, 0x74, 0x18, 0x08, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x12, 0x10,
- 0x0a, 0x03, 0x73, 0x72, 0x63, 0x18, 0x09, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x72, 0x63,
- 0x12, 0x2a, 0x0a, 0x10, 0x45, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65,
- 0x74, 0x65, 0x72, 0x73, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x09, 0x52, 0x10, 0x45, 0x6e, 0x61, 0x62,
- 0x6c, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x73, 0x12, 0x1a, 0x0a, 0x08,
- 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08,
- 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x22, 0x5e, 0x0a, 0x0a, 0x54, 0x54, 0x53, 0x52,
- 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x01,
- 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x6d, 0x6f,
- 0x64, 0x65, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c,
- 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64,
- 0x73, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x6f, 0x69, 0x63, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28,
- 0x09, 0x52, 0x05, 0x76, 0x6f, 0x69, 0x63, 0x65, 0x22, 0x46, 0x0a, 0x14, 0x54, 0x6f, 0x6b, 0x65,
- 0x6e, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65,
- 0x12, 0x16, 0x0a, 0x06, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05,
- 0x52, 0x06, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x12, 0x16, 0x0a, 0x06, 0x74, 0x6f, 0x6b, 0x65,
- 0x6e, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x05, 0x52, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73,
- 0x22, 0xac, 0x01, 0x0a, 0x0f, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65,
- 0x44, 0x61, 0x74, 0x61, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x18, 0x01, 0x20,
- 0x01, 0x28, 0x04, 0x52, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x12, 0x45, 0x0a, 0x09, 0x62, 0x72,
- 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x27, 0x2e,
- 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73,
- 0x61, 0x67, 0x65, 0x44, 0x61, 0x74, 0x61, 0x2e, 0x42, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77,
- 0x6e, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x09, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77,
- 0x6e, 0x1a, 0x3c, 0x0a, 0x0e, 0x42, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x45, 0x6e,
- 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09,
- 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02,
- 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22,
- 0xbc, 0x01, 0x0a, 0x0e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e,
- 0x73, 0x65, 0x12, 0x33, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28,
- 0x0e, 0x32, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x61, 0x74,
- 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65,
- 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x30, 0x0a, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72,
- 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e,
- 0x64, 0x2e, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65, 0x44, 0x61, 0x74,
- 0x61, 0x52, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x22, 0x43, 0x0a, 0x05, 0x53, 0x74, 0x61,
- 0x74, 0x65, 0x12, 0x11, 0x0a, 0x0d, 0x55, 0x4e, 0x49, 0x4e, 0x49, 0x54, 0x49, 0x41, 0x4c, 0x49,
- 0x5a, 0x45, 0x44, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x42, 0x55, 0x53, 0x59, 0x10, 0x01, 0x12,
- 0x09, 0x0a, 0x05, 0x52, 0x45, 0x41, 0x44, 0x59, 0x10, 0x02, 0x12, 0x12, 0x0a, 0x05, 0x45, 0x52,
- 0x52, 0x4f, 0x52, 0x10, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0x22, 0x37,
- 0x0a, 0x07, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x6f, 0x6c,
- 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x72, 0x6f, 0x6c, 0x65, 0x12, 0x18, 0x0a,
- 0x07, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07,
- 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x32, 0xfb, 0x06, 0x0a, 0x07, 0x42, 0x61, 0x63, 0x6b,
- 0x65, 0x6e, 0x64, 0x12, 0x32, 0x0a, 0x06, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x12, 0x16, 0x2e,
- 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65,
- 0x73, 0x73, 0x61, 0x67, 0x65, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e,
- 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x34, 0x0a, 0x07, 0x50, 0x72, 0x65, 0x64, 0x69,
- 0x63, 0x74, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65,
- 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0e, 0x2e, 0x62, 0x61,
- 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x35, 0x0a,
- 0x09, 0x4c, 0x6f, 0x61, 0x64, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x15, 0x2e, 0x62, 0x61, 0x63,
- 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e,
- 0x73, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75,
- 0x6c, 0x74, 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0d, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x53,
- 0x74, 0x72, 0x65, 0x61, 0x6d, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e,
- 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0e,
- 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00,
- 0x30, 0x01, 0x12, 0x40, 0x0a, 0x09, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x12,
- 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63,
- 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65,
- 0x6e, 0x64, 0x2e, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75,
- 0x6c, 0x74, 0x22, 0x00, 0x12, 0x41, 0x0a, 0x0d, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65,
- 0x49, 0x6d, 0x61, 0x67, 0x65, 0x12, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e,
- 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x52, 0x65, 0x71,
- 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52,
- 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4d, 0x0a, 0x12, 0x41, 0x75, 0x64, 0x69, 0x6f,
- 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x1a, 0x2e,
- 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69,
- 0x70, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x19, 0x2e, 0x62, 0x61, 0x63, 0x6b,
- 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65,
- 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x2d, 0x0a, 0x03, 0x54, 0x54, 0x53, 0x12, 0x13, 0x2e,
- 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x54, 0x53, 0x52, 0x65, 0x71, 0x75, 0x65,
- 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73,
- 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4a, 0x0a, 0x0e, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a,
- 0x65, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e,
- 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73,
- 0x1a, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x6f, 0x6b, 0x65, 0x6e,
- 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22,
- 0x00, 0x12, 0x3b, 0x0a, 0x06, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x16, 0x2e, 0x62, 0x61,
- 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73,
- 0x61, 0x67, 0x65, 0x1a, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74,
- 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x39,
- 0x0a, 0x09, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x53, 0x65, 0x74, 0x12, 0x19, 0x2e, 0x62, 0x61,
- 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x53, 0x65, 0x74, 0x4f,
- 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64,
- 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x3f, 0x0a, 0x0c, 0x53, 0x74, 0x6f,
- 0x72, 0x65, 0x73, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x12, 0x1c, 0x2e, 0x62, 0x61, 0x63, 0x6b,
- 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65,
- 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e,
- 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x42, 0x0a, 0x09, 0x53, 0x74,
- 0x6f, 0x72, 0x65, 0x73, 0x47, 0x65, 0x74, 0x12, 0x19, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e,
- 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x47, 0x65, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f,
- 0x6e, 0x73, 0x1a, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f,
- 0x72, 0x65, 0x73, 0x47, 0x65, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x45,
- 0x0a, 0x0a, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69, 0x6e, 0x64, 0x12, 0x1a, 0x2e, 0x62,
- 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69, 0x6e,
- 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x19, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65,
- 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69, 0x6e, 0x64, 0x52, 0x65, 0x73,
- 0x75, 0x6c, 0x74, 0x22, 0x00, 0x42, 0x5a, 0x0a, 0x19, 0x69, 0x6f, 0x2e, 0x73, 0x6b, 0x79, 0x6e,
- 0x65, 0x74, 0x2e, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x61, 0x69, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65,
- 0x6e, 0x64, 0x42, 0x0e, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49, 0x42, 0x61, 0x63, 0x6b, 0x65,
- 0x6e, 0x64, 0x50, 0x01, 0x5a, 0x2b, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d,
- 0x2f, 0x67, 0x6f, 0x2d, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2f, 0x4c, 0x6f, 0x63, 0x61, 0x6c,
- 0x41, 0x49, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x67, 0x72, 0x70, 0x63, 0x2f, 0x70, 0x72, 0x6f, 0x74,
- 0x6f, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
-}
-
-var (
- file_backend_proto_rawDescOnce sync.Once
- file_backend_proto_rawDescData = file_backend_proto_rawDesc
-)
-
-func file_backend_proto_rawDescGZIP() []byte {
- file_backend_proto_rawDescOnce.Do(func() {
- file_backend_proto_rawDescData = protoimpl.X.CompressGZIP(file_backend_proto_rawDescData)
- })
- return file_backend_proto_rawDescData
-}
-
-var file_backend_proto_enumTypes = make([]protoimpl.EnumInfo, 1)
-var file_backend_proto_msgTypes = make([]protoimpl.MessageInfo, 24)
-var file_backend_proto_goTypes = []interface{}{
- (StatusResponse_State)(0), // 0: backend.StatusResponse.State
- (*StoresKey)(nil), // 1: backend.StoresKey
- (*StoresValue)(nil), // 2: backend.StoresValue
- (*StoresSetOptions)(nil), // 3: backend.StoresSetOptions
- (*StoresDeleteOptions)(nil), // 4: backend.StoresDeleteOptions
- (*StoresGetOptions)(nil), // 5: backend.StoresGetOptions
- (*StoresGetResult)(nil), // 6: backend.StoresGetResult
- (*StoresFindOptions)(nil), // 7: backend.StoresFindOptions
- (*StoresFindResult)(nil), // 8: backend.StoresFindResult
- (*HealthMessage)(nil), // 9: backend.HealthMessage
- (*PredictOptions)(nil), // 10: backend.PredictOptions
- (*Reply)(nil), // 11: backend.Reply
- (*ModelOptions)(nil), // 12: backend.ModelOptions
- (*Result)(nil), // 13: backend.Result
- (*EmbeddingResult)(nil), // 14: backend.EmbeddingResult
- (*TranscriptRequest)(nil), // 15: backend.TranscriptRequest
- (*TranscriptResult)(nil), // 16: backend.TranscriptResult
- (*TranscriptSegment)(nil), // 17: backend.TranscriptSegment
- (*GenerateImageRequest)(nil), // 18: backend.GenerateImageRequest
- (*TTSRequest)(nil), // 19: backend.TTSRequest
- (*TokenizationResponse)(nil), // 20: backend.TokenizationResponse
- (*MemoryUsageData)(nil), // 21: backend.MemoryUsageData
- (*StatusResponse)(nil), // 22: backend.StatusResponse
- (*Message)(nil), // 23: backend.Message
- nil, // 24: backend.MemoryUsageData.BreakdownEntry
-}
-var file_backend_proto_depIdxs = []int32{
- 1, // 0: backend.StoresSetOptions.Keys:type_name -> backend.StoresKey
- 2, // 1: backend.StoresSetOptions.Values:type_name -> backend.StoresValue
- 1, // 2: backend.StoresDeleteOptions.Keys:type_name -> backend.StoresKey
- 1, // 3: backend.StoresGetOptions.Keys:type_name -> backend.StoresKey
- 1, // 4: backend.StoresGetResult.Keys:type_name -> backend.StoresKey
- 2, // 5: backend.StoresGetResult.Values:type_name -> backend.StoresValue
- 1, // 6: backend.StoresFindOptions.Key:type_name -> backend.StoresKey
- 1, // 7: backend.StoresFindResult.Keys:type_name -> backend.StoresKey
- 2, // 8: backend.StoresFindResult.Values:type_name -> backend.StoresValue
- 23, // 9: backend.PredictOptions.Messages:type_name -> backend.Message
- 17, // 10: backend.TranscriptResult.segments:type_name -> backend.TranscriptSegment
- 24, // 11: backend.MemoryUsageData.breakdown:type_name -> backend.MemoryUsageData.BreakdownEntry
- 0, // 12: backend.StatusResponse.state:type_name -> backend.StatusResponse.State
- 21, // 13: backend.StatusResponse.memory:type_name -> backend.MemoryUsageData
- 9, // 14: backend.Backend.Health:input_type -> backend.HealthMessage
- 10, // 15: backend.Backend.Predict:input_type -> backend.PredictOptions
- 12, // 16: backend.Backend.LoadModel:input_type -> backend.ModelOptions
- 10, // 17: backend.Backend.PredictStream:input_type -> backend.PredictOptions
- 10, // 18: backend.Backend.Embedding:input_type -> backend.PredictOptions
- 18, // 19: backend.Backend.GenerateImage:input_type -> backend.GenerateImageRequest
- 15, // 20: backend.Backend.AudioTranscription:input_type -> backend.TranscriptRequest
- 19, // 21: backend.Backend.TTS:input_type -> backend.TTSRequest
- 10, // 22: backend.Backend.TokenizeString:input_type -> backend.PredictOptions
- 9, // 23: backend.Backend.Status:input_type -> backend.HealthMessage
- 3, // 24: backend.Backend.StoresSet:input_type -> backend.StoresSetOptions
- 4, // 25: backend.Backend.StoresDelete:input_type -> backend.StoresDeleteOptions
- 5, // 26: backend.Backend.StoresGet:input_type -> backend.StoresGetOptions
- 7, // 27: backend.Backend.StoresFind:input_type -> backend.StoresFindOptions
- 11, // 28: backend.Backend.Health:output_type -> backend.Reply
- 11, // 29: backend.Backend.Predict:output_type -> backend.Reply
- 13, // 30: backend.Backend.LoadModel:output_type -> backend.Result
- 11, // 31: backend.Backend.PredictStream:output_type -> backend.Reply
- 14, // 32: backend.Backend.Embedding:output_type -> backend.EmbeddingResult
- 13, // 33: backend.Backend.GenerateImage:output_type -> backend.Result
- 16, // 34: backend.Backend.AudioTranscription:output_type -> backend.TranscriptResult
- 13, // 35: backend.Backend.TTS:output_type -> backend.Result
- 20, // 36: backend.Backend.TokenizeString:output_type -> backend.TokenizationResponse
- 22, // 37: backend.Backend.Status:output_type -> backend.StatusResponse
- 13, // 38: backend.Backend.StoresSet:output_type -> backend.Result
- 13, // 39: backend.Backend.StoresDelete:output_type -> backend.Result
- 6, // 40: backend.Backend.StoresGet:output_type -> backend.StoresGetResult
- 8, // 41: backend.Backend.StoresFind:output_type -> backend.StoresFindResult
- 28, // [28:42] is the sub-list for method output_type
- 14, // [14:28] is the sub-list for method input_type
- 14, // [14:14] is the sub-list for extension type_name
- 14, // [14:14] is the sub-list for extension extendee
- 0, // [0:14] is the sub-list for field type_name
-}
-
-func init() { file_backend_proto_init() }
-func file_backend_proto_init() {
- if File_backend_proto != nil {
- return
- }
- if !protoimpl.UnsafeEnabled {
- file_backend_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} {
- switch v := v.(*StoresKey); i {
- case 0:
- return &v.state
- case 1:
- return &v.sizeCache
- case 2:
- return &v.unknownFields
- default:
- return nil
- }
- }
- file_backend_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} {
- switch v := v.(*StoresValue); i {
- case 0:
- return &v.state
- case 1:
- return &v.sizeCache
- case 2:
- return &v.unknownFields
- default:
- return nil
- }
- }
- file_backend_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} {
- switch v := v.(*StoresSetOptions); i {
- case 0:
- return &v.state
- case 1:
- return &v.sizeCache
- case 2:
- return &v.unknownFields
- default:
- return nil
- }
- }
- file_backend_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} {
- switch v := v.(*StoresDeleteOptions); i {
- case 0:
- return &v.state
- case 1:
- return &v.sizeCache
- case 2:
- return &v.unknownFields
- default:
- return nil
- }
- }
- file_backend_proto_msgTypes[4].Exporter = func(v interface{}, i int) interface{} {
- switch v := v.(*StoresGetOptions); i {
- case 0:
- return &v.state
- case 1:
- return &v.sizeCache
- case 2:
- return &v.unknownFields
- default:
- return nil
- }
- }
- file_backend_proto_msgTypes[5].Exporter = func(v interface{}, i int) interface{} {
- switch v := v.(*StoresGetResult); i {
- case 0:
- return &v.state
- case 1:
- return &v.sizeCache
- case 2:
- return &v.unknownFields
- default:
- return nil
- }
- }
- file_backend_proto_msgTypes[6].Exporter = func(v interface{}, i int) interface{} {
- switch v := v.(*StoresFindOptions); i {
- case 0:
- return &v.state
- case 1:
- return &v.sizeCache
- case 2:
- return &v.unknownFields
- default:
- return nil
- }
- }
- file_backend_proto_msgTypes[7].Exporter = func(v interface{}, i int) interface{} {
- switch v := v.(*StoresFindResult); i {
- case 0:
- return &v.state
- case 1:
- return &v.sizeCache
- case 2:
- return &v.unknownFields
- default:
- return nil
- }
- }
- file_backend_proto_msgTypes[8].Exporter = func(v interface{}, i int) interface{} {
- switch v := v.(*HealthMessage); i {
- case 0:
- return &v.state
- case 1:
- return &v.sizeCache
- case 2:
- return &v.unknownFields
- default:
- return nil
- }
- }
- file_backend_proto_msgTypes[9].Exporter = func(v interface{}, i int) interface{} {
- switch v := v.(*PredictOptions); i {
- case 0:
- return &v.state
- case 1:
- return &v.sizeCache
- case 2:
- return &v.unknownFields
- default:
- return nil
- }
- }
- file_backend_proto_msgTypes[10].Exporter = func(v interface{}, i int) interface{} {
- switch v := v.(*Reply); i {
- case 0:
- return &v.state
- case 1:
- return &v.sizeCache
- case 2:
- return &v.unknownFields
- default:
- return nil
- }
- }
- file_backend_proto_msgTypes[11].Exporter = func(v interface{}, i int) interface{} {
- switch v := v.(*ModelOptions); i {
- case 0:
- return &v.state
- case 1:
- return &v.sizeCache
- case 2:
- return &v.unknownFields
- default:
- return nil
- }
- }
- file_backend_proto_msgTypes[12].Exporter = func(v interface{}, i int) interface{} {
- switch v := v.(*Result); i {
- case 0:
- return &v.state
- case 1:
- return &v.sizeCache
- case 2:
- return &v.unknownFields
- default:
- return nil
- }
- }
- file_backend_proto_msgTypes[13].Exporter = func(v interface{}, i int) interface{} {
- switch v := v.(*EmbeddingResult); i {
- case 0:
- return &v.state
- case 1:
- return &v.sizeCache
- case 2:
- return &v.unknownFields
- default:
- return nil
- }
- }
- file_backend_proto_msgTypes[14].Exporter = func(v interface{}, i int) interface{} {
- switch v := v.(*TranscriptRequest); i {
- case 0:
- return &v.state
- case 1:
- return &v.sizeCache
- case 2:
- return &v.unknownFields
- default:
- return nil
- }
- }
- file_backend_proto_msgTypes[15].Exporter = func(v interface{}, i int) interface{} {
- switch v := v.(*TranscriptResult); i {
- case 0:
- return &v.state
- case 1:
- return &v.sizeCache
- case 2:
- return &v.unknownFields
- default:
- return nil
- }
- }
- file_backend_proto_msgTypes[16].Exporter = func(v interface{}, i int) interface{} {
- switch v := v.(*TranscriptSegment); i {
- case 0:
- return &v.state
- case 1:
- return &v.sizeCache
- case 2:
- return &v.unknownFields
- default:
- return nil
- }
- }
- file_backend_proto_msgTypes[17].Exporter = func(v interface{}, i int) interface{} {
- switch v := v.(*GenerateImageRequest); i {
- case 0:
- return &v.state
- case 1:
- return &v.sizeCache
- case 2:
- return &v.unknownFields
- default:
- return nil
- }
- }
- file_backend_proto_msgTypes[18].Exporter = func(v interface{}, i int) interface{} {
- switch v := v.(*TTSRequest); i {
- case 0:
- return &v.state
- case 1:
- return &v.sizeCache
- case 2:
- return &v.unknownFields
- default:
- return nil
- }
- }
- file_backend_proto_msgTypes[19].Exporter = func(v interface{}, i int) interface{} {
- switch v := v.(*TokenizationResponse); i {
- case 0:
- return &v.state
- case 1:
- return &v.sizeCache
- case 2:
- return &v.unknownFields
- default:
- return nil
- }
- }
- file_backend_proto_msgTypes[20].Exporter = func(v interface{}, i int) interface{} {
- switch v := v.(*MemoryUsageData); i {
- case 0:
- return &v.state
- case 1:
- return &v.sizeCache
- case 2:
- return &v.unknownFields
- default:
- return nil
- }
- }
- file_backend_proto_msgTypes[21].Exporter = func(v interface{}, i int) interface{} {
- switch v := v.(*StatusResponse); i {
- case 0:
- return &v.state
- case 1:
- return &v.sizeCache
- case 2:
- return &v.unknownFields
- default:
- return nil
- }
- }
- file_backend_proto_msgTypes[22].Exporter = func(v interface{}, i int) interface{} {
- switch v := v.(*Message); i {
- case 0:
- return &v.state
- case 1:
- return &v.sizeCache
- case 2:
- return &v.unknownFields
- default:
- return nil
- }
- }
- }
- type x struct{}
- out := protoimpl.TypeBuilder{
- File: protoimpl.DescBuilder{
- GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
- RawDescriptor: file_backend_proto_rawDesc,
- NumEnums: 1,
- NumMessages: 24,
- NumExtensions: 0,
- NumServices: 1,
- },
- GoTypes: file_backend_proto_goTypes,
- DependencyIndexes: file_backend_proto_depIdxs,
- EnumInfos: file_backend_proto_enumTypes,
- MessageInfos: file_backend_proto_msgTypes,
- }.Build()
- File_backend_proto = out.File
- file_backend_proto_rawDesc = nil
- file_backend_proto_goTypes = nil
- file_backend_proto_depIdxs = nil
-}
diff --git a/pkg/grpc/proto/backend_grpc.pb.go b/pkg/grpc/proto/backend_grpc.pb.go
deleted file mode 100644
index a1f442e0..00000000
--- a/pkg/grpc/proto/backend_grpc.pb.go
+++ /dev/null
@@ -1,618 +0,0 @@
-// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
-// versions:
-// - protoc-gen-go-grpc v1.3.0
-// - protoc v5.26.1
-// source: backend.proto
-
-package proto
-
-import (
- context "context"
- grpc "google.golang.org/grpc"
- codes "google.golang.org/grpc/codes"
- status "google.golang.org/grpc/status"
-)
-
-// This is a compile-time assertion to ensure that this generated file
-// is compatible with the grpc package it is being compiled against.
-// Requires gRPC-Go v1.32.0 or later.
-const _ = grpc.SupportPackageIsVersion7
-
-const (
- Backend_Health_FullMethodName = "/backend.Backend/Health"
- Backend_Predict_FullMethodName = "/backend.Backend/Predict"
- Backend_LoadModel_FullMethodName = "/backend.Backend/LoadModel"
- Backend_PredictStream_FullMethodName = "/backend.Backend/PredictStream"
- Backend_Embedding_FullMethodName = "/backend.Backend/Embedding"
- Backend_GenerateImage_FullMethodName = "/backend.Backend/GenerateImage"
- Backend_AudioTranscription_FullMethodName = "/backend.Backend/AudioTranscription"
- Backend_TTS_FullMethodName = "/backend.Backend/TTS"
- Backend_TokenizeString_FullMethodName = "/backend.Backend/TokenizeString"
- Backend_Status_FullMethodName = "/backend.Backend/Status"
- Backend_StoresSet_FullMethodName = "/backend.Backend/StoresSet"
- Backend_StoresDelete_FullMethodName = "/backend.Backend/StoresDelete"
- Backend_StoresGet_FullMethodName = "/backend.Backend/StoresGet"
- Backend_StoresFind_FullMethodName = "/backend.Backend/StoresFind"
-)
-
-// BackendClient is the client API for Backend service.
-//
-// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
-type BackendClient interface {
- Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error)
- Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error)
- LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error)
- PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (Backend_PredictStreamClient, error)
- Embedding(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*EmbeddingResult, error)
- GenerateImage(ctx context.Context, in *GenerateImageRequest, opts ...grpc.CallOption) (*Result, error)
- AudioTranscription(ctx context.Context, in *TranscriptRequest, opts ...grpc.CallOption) (*TranscriptResult, error)
- TTS(ctx context.Context, in *TTSRequest, opts ...grpc.CallOption) (*Result, error)
- TokenizeString(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*TokenizationResponse, error)
- Status(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*StatusResponse, error)
- StoresSet(ctx context.Context, in *StoresSetOptions, opts ...grpc.CallOption) (*Result, error)
- StoresDelete(ctx context.Context, in *StoresDeleteOptions, opts ...grpc.CallOption) (*Result, error)
- StoresGet(ctx context.Context, in *StoresGetOptions, opts ...grpc.CallOption) (*StoresGetResult, error)
- StoresFind(ctx context.Context, in *StoresFindOptions, opts ...grpc.CallOption) (*StoresFindResult, error)
-}
-
-type backendClient struct {
- cc grpc.ClientConnInterface
-}
-
-func NewBackendClient(cc grpc.ClientConnInterface) BackendClient {
- return &backendClient{cc}
-}
-
-func (c *backendClient) Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error) {
- out := new(Reply)
- err := c.cc.Invoke(ctx, Backend_Health_FullMethodName, in, out, opts...)
- if err != nil {
- return nil, err
- }
- return out, nil
-}
-
-func (c *backendClient) Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error) {
- out := new(Reply)
- err := c.cc.Invoke(ctx, Backend_Predict_FullMethodName, in, out, opts...)
- if err != nil {
- return nil, err
- }
- return out, nil
-}
-
-func (c *backendClient) LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error) {
- out := new(Result)
- err := c.cc.Invoke(ctx, Backend_LoadModel_FullMethodName, in, out, opts...)
- if err != nil {
- return nil, err
- }
- return out, nil
-}
-
-func (c *backendClient) PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (Backend_PredictStreamClient, error) {
- stream, err := c.cc.NewStream(ctx, &Backend_ServiceDesc.Streams[0], Backend_PredictStream_FullMethodName, opts...)
- if err != nil {
- return nil, err
- }
- x := &backendPredictStreamClient{stream}
- if err := x.ClientStream.SendMsg(in); err != nil {
- return nil, err
- }
- if err := x.ClientStream.CloseSend(); err != nil {
- return nil, err
- }
- return x, nil
-}
-
-type Backend_PredictStreamClient interface {
- Recv() (*Reply, error)
- grpc.ClientStream
-}
-
-type backendPredictStreamClient struct {
- grpc.ClientStream
-}
-
-func (x *backendPredictStreamClient) Recv() (*Reply, error) {
- m := new(Reply)
- if err := x.ClientStream.RecvMsg(m); err != nil {
- return nil, err
- }
- return m, nil
-}
-
-func (c *backendClient) Embedding(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*EmbeddingResult, error) {
- out := new(EmbeddingResult)
- err := c.cc.Invoke(ctx, Backend_Embedding_FullMethodName, in, out, opts...)
- if err != nil {
- return nil, err
- }
- return out, nil
-}
-
-func (c *backendClient) GenerateImage(ctx context.Context, in *GenerateImageRequest, opts ...grpc.CallOption) (*Result, error) {
- out := new(Result)
- err := c.cc.Invoke(ctx, Backend_GenerateImage_FullMethodName, in, out, opts...)
- if err != nil {
- return nil, err
- }
- return out, nil
-}
-
-func (c *backendClient) AudioTranscription(ctx context.Context, in *TranscriptRequest, opts ...grpc.CallOption) (*TranscriptResult, error) {
- out := new(TranscriptResult)
- err := c.cc.Invoke(ctx, Backend_AudioTranscription_FullMethodName, in, out, opts...)
- if err != nil {
- return nil, err
- }
- return out, nil
-}
-
-func (c *backendClient) TTS(ctx context.Context, in *TTSRequest, opts ...grpc.CallOption) (*Result, error) {
- out := new(Result)
- err := c.cc.Invoke(ctx, Backend_TTS_FullMethodName, in, out, opts...)
- if err != nil {
- return nil, err
- }
- return out, nil
-}
-
-func (c *backendClient) TokenizeString(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*TokenizationResponse, error) {
- out := new(TokenizationResponse)
- err := c.cc.Invoke(ctx, Backend_TokenizeString_FullMethodName, in, out, opts...)
- if err != nil {
- return nil, err
- }
- return out, nil
-}
-
-func (c *backendClient) Status(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*StatusResponse, error) {
- out := new(StatusResponse)
- err := c.cc.Invoke(ctx, Backend_Status_FullMethodName, in, out, opts...)
- if err != nil {
- return nil, err
- }
- return out, nil
-}
-
-func (c *backendClient) StoresSet(ctx context.Context, in *StoresSetOptions, opts ...grpc.CallOption) (*Result, error) {
- out := new(Result)
- err := c.cc.Invoke(ctx, Backend_StoresSet_FullMethodName, in, out, opts...)
- if err != nil {
- return nil, err
- }
- return out, nil
-}
-
-func (c *backendClient) StoresDelete(ctx context.Context, in *StoresDeleteOptions, opts ...grpc.CallOption) (*Result, error) {
- out := new(Result)
- err := c.cc.Invoke(ctx, Backend_StoresDelete_FullMethodName, in, out, opts...)
- if err != nil {
- return nil, err
- }
- return out, nil
-}
-
-func (c *backendClient) StoresGet(ctx context.Context, in *StoresGetOptions, opts ...grpc.CallOption) (*StoresGetResult, error) {
- out := new(StoresGetResult)
- err := c.cc.Invoke(ctx, Backend_StoresGet_FullMethodName, in, out, opts...)
- if err != nil {
- return nil, err
- }
- return out, nil
-}
-
-func (c *backendClient) StoresFind(ctx context.Context, in *StoresFindOptions, opts ...grpc.CallOption) (*StoresFindResult, error) {
- out := new(StoresFindResult)
- err := c.cc.Invoke(ctx, Backend_StoresFind_FullMethodName, in, out, opts...)
- if err != nil {
- return nil, err
- }
- return out, nil
-}
-
-// BackendServer is the server API for Backend service.
-// All implementations must embed UnimplementedBackendServer
-// for forward compatibility
-type BackendServer interface {
- Health(context.Context, *HealthMessage) (*Reply, error)
- Predict(context.Context, *PredictOptions) (*Reply, error)
- LoadModel(context.Context, *ModelOptions) (*Result, error)
- PredictStream(*PredictOptions, Backend_PredictStreamServer) error
- Embedding(context.Context, *PredictOptions) (*EmbeddingResult, error)
- GenerateImage(context.Context, *GenerateImageRequest) (*Result, error)
- AudioTranscription(context.Context, *TranscriptRequest) (*TranscriptResult, error)
- TTS(context.Context, *TTSRequest) (*Result, error)
- TokenizeString(context.Context, *PredictOptions) (*TokenizationResponse, error)
- Status(context.Context, *HealthMessage) (*StatusResponse, error)
- StoresSet(context.Context, *StoresSetOptions) (*Result, error)
- StoresDelete(context.Context, *StoresDeleteOptions) (*Result, error)
- StoresGet(context.Context, *StoresGetOptions) (*StoresGetResult, error)
- StoresFind(context.Context, *StoresFindOptions) (*StoresFindResult, error)
- mustEmbedUnimplementedBackendServer()
-}
-
-// UnimplementedBackendServer must be embedded to have forward compatible implementations.
-type UnimplementedBackendServer struct {
-}
-
-func (UnimplementedBackendServer) Health(context.Context, *HealthMessage) (*Reply, error) {
- return nil, status.Errorf(codes.Unimplemented, "method Health not implemented")
-}
-func (UnimplementedBackendServer) Predict(context.Context, *PredictOptions) (*Reply, error) {
- return nil, status.Errorf(codes.Unimplemented, "method Predict not implemented")
-}
-func (UnimplementedBackendServer) LoadModel(context.Context, *ModelOptions) (*Result, error) {
- return nil, status.Errorf(codes.Unimplemented, "method LoadModel not implemented")
-}
-func (UnimplementedBackendServer) PredictStream(*PredictOptions, Backend_PredictStreamServer) error {
- return status.Errorf(codes.Unimplemented, "method PredictStream not implemented")
-}
-func (UnimplementedBackendServer) Embedding(context.Context, *PredictOptions) (*EmbeddingResult, error) {
- return nil, status.Errorf(codes.Unimplemented, "method Embedding not implemented")
-}
-func (UnimplementedBackendServer) GenerateImage(context.Context, *GenerateImageRequest) (*Result, error) {
- return nil, status.Errorf(codes.Unimplemented, "method GenerateImage not implemented")
-}
-func (UnimplementedBackendServer) AudioTranscription(context.Context, *TranscriptRequest) (*TranscriptResult, error) {
- return nil, status.Errorf(codes.Unimplemented, "method AudioTranscription not implemented")
-}
-func (UnimplementedBackendServer) TTS(context.Context, *TTSRequest) (*Result, error) {
- return nil, status.Errorf(codes.Unimplemented, "method TTS not implemented")
-}
-func (UnimplementedBackendServer) TokenizeString(context.Context, *PredictOptions) (*TokenizationResponse, error) {
- return nil, status.Errorf(codes.Unimplemented, "method TokenizeString not implemented")
-}
-func (UnimplementedBackendServer) Status(context.Context, *HealthMessage) (*StatusResponse, error) {
- return nil, status.Errorf(codes.Unimplemented, "method Status not implemented")
-}
-func (UnimplementedBackendServer) StoresSet(context.Context, *StoresSetOptions) (*Result, error) {
- return nil, status.Errorf(codes.Unimplemented, "method StoresSet not implemented")
-}
-func (UnimplementedBackendServer) StoresDelete(context.Context, *StoresDeleteOptions) (*Result, error) {
- return nil, status.Errorf(codes.Unimplemented, "method StoresDelete not implemented")
-}
-func (UnimplementedBackendServer) StoresGet(context.Context, *StoresGetOptions) (*StoresGetResult, error) {
- return nil, status.Errorf(codes.Unimplemented, "method StoresGet not implemented")
-}
-func (UnimplementedBackendServer) StoresFind(context.Context, *StoresFindOptions) (*StoresFindResult, error) {
- return nil, status.Errorf(codes.Unimplemented, "method StoresFind not implemented")
-}
-func (UnimplementedBackendServer) mustEmbedUnimplementedBackendServer() {}
-
-// UnsafeBackendServer may be embedded to opt out of forward compatibility for this service.
-// Use of this interface is not recommended, as added methods to BackendServer will
-// result in compilation errors.
-type UnsafeBackendServer interface {
- mustEmbedUnimplementedBackendServer()
-}
-
-func RegisterBackendServer(s grpc.ServiceRegistrar, srv BackendServer) {
- s.RegisterService(&Backend_ServiceDesc, srv)
-}
-
-func _Backend_Health_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
- in := new(HealthMessage)
- if err := dec(in); err != nil {
- return nil, err
- }
- if interceptor == nil {
- return srv.(BackendServer).Health(ctx, in)
- }
- info := &grpc.UnaryServerInfo{
- Server: srv,
- FullMethod: Backend_Health_FullMethodName,
- }
- handler := func(ctx context.Context, req interface{}) (interface{}, error) {
- return srv.(BackendServer).Health(ctx, req.(*HealthMessage))
- }
- return interceptor(ctx, in, info, handler)
-}
-
-func _Backend_Predict_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
- in := new(PredictOptions)
- if err := dec(in); err != nil {
- return nil, err
- }
- if interceptor == nil {
- return srv.(BackendServer).Predict(ctx, in)
- }
- info := &grpc.UnaryServerInfo{
- Server: srv,
- FullMethod: Backend_Predict_FullMethodName,
- }
- handler := func(ctx context.Context, req interface{}) (interface{}, error) {
- return srv.(BackendServer).Predict(ctx, req.(*PredictOptions))
- }
- return interceptor(ctx, in, info, handler)
-}
-
-func _Backend_LoadModel_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
- in := new(ModelOptions)
- if err := dec(in); err != nil {
- return nil, err
- }
- if interceptor == nil {
- return srv.(BackendServer).LoadModel(ctx, in)
- }
- info := &grpc.UnaryServerInfo{
- Server: srv,
- FullMethod: Backend_LoadModel_FullMethodName,
- }
- handler := func(ctx context.Context, req interface{}) (interface{}, error) {
- return srv.(BackendServer).LoadModel(ctx, req.(*ModelOptions))
- }
- return interceptor(ctx, in, info, handler)
-}
-
-func _Backend_PredictStream_Handler(srv interface{}, stream grpc.ServerStream) error {
- m := new(PredictOptions)
- if err := stream.RecvMsg(m); err != nil {
- return err
- }
- return srv.(BackendServer).PredictStream(m, &backendPredictStreamServer{stream})
-}
-
-type Backend_PredictStreamServer interface {
- Send(*Reply) error
- grpc.ServerStream
-}
-
-type backendPredictStreamServer struct {
- grpc.ServerStream
-}
-
-func (x *backendPredictStreamServer) Send(m *Reply) error {
- return x.ServerStream.SendMsg(m)
-}
-
-func _Backend_Embedding_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
- in := new(PredictOptions)
- if err := dec(in); err != nil {
- return nil, err
- }
- if interceptor == nil {
- return srv.(BackendServer).Embedding(ctx, in)
- }
- info := &grpc.UnaryServerInfo{
- Server: srv,
- FullMethod: Backend_Embedding_FullMethodName,
- }
- handler := func(ctx context.Context, req interface{}) (interface{}, error) {
- return srv.(BackendServer).Embedding(ctx, req.(*PredictOptions))
- }
- return interceptor(ctx, in, info, handler)
-}
-
-func _Backend_GenerateImage_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
- in := new(GenerateImageRequest)
- if err := dec(in); err != nil {
- return nil, err
- }
- if interceptor == nil {
- return srv.(BackendServer).GenerateImage(ctx, in)
- }
- info := &grpc.UnaryServerInfo{
- Server: srv,
- FullMethod: Backend_GenerateImage_FullMethodName,
- }
- handler := func(ctx context.Context, req interface{}) (interface{}, error) {
- return srv.(BackendServer).GenerateImage(ctx, req.(*GenerateImageRequest))
- }
- return interceptor(ctx, in, info, handler)
-}
-
-func _Backend_AudioTranscription_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
- in := new(TranscriptRequest)
- if err := dec(in); err != nil {
- return nil, err
- }
- if interceptor == nil {
- return srv.(BackendServer).AudioTranscription(ctx, in)
- }
- info := &grpc.UnaryServerInfo{
- Server: srv,
- FullMethod: Backend_AudioTranscription_FullMethodName,
- }
- handler := func(ctx context.Context, req interface{}) (interface{}, error) {
- return srv.(BackendServer).AudioTranscription(ctx, req.(*TranscriptRequest))
- }
- return interceptor(ctx, in, info, handler)
-}
-
-func _Backend_TTS_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
- in := new(TTSRequest)
- if err := dec(in); err != nil {
- return nil, err
- }
- if interceptor == nil {
- return srv.(BackendServer).TTS(ctx, in)
- }
- info := &grpc.UnaryServerInfo{
- Server: srv,
- FullMethod: Backend_TTS_FullMethodName,
- }
- handler := func(ctx context.Context, req interface{}) (interface{}, error) {
- return srv.(BackendServer).TTS(ctx, req.(*TTSRequest))
- }
- return interceptor(ctx, in, info, handler)
-}
-
-func _Backend_TokenizeString_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
- in := new(PredictOptions)
- if err := dec(in); err != nil {
- return nil, err
- }
- if interceptor == nil {
- return srv.(BackendServer).TokenizeString(ctx, in)
- }
- info := &grpc.UnaryServerInfo{
- Server: srv,
- FullMethod: Backend_TokenizeString_FullMethodName,
- }
- handler := func(ctx context.Context, req interface{}) (interface{}, error) {
- return srv.(BackendServer).TokenizeString(ctx, req.(*PredictOptions))
- }
- return interceptor(ctx, in, info, handler)
-}
-
-func _Backend_Status_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
- in := new(HealthMessage)
- if err := dec(in); err != nil {
- return nil, err
- }
- if interceptor == nil {
- return srv.(BackendServer).Status(ctx, in)
- }
- info := &grpc.UnaryServerInfo{
- Server: srv,
- FullMethod: Backend_Status_FullMethodName,
- }
- handler := func(ctx context.Context, req interface{}) (interface{}, error) {
- return srv.(BackendServer).Status(ctx, req.(*HealthMessage))
- }
- return interceptor(ctx, in, info, handler)
-}
-
-func _Backend_StoresSet_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
- in := new(StoresSetOptions)
- if err := dec(in); err != nil {
- return nil, err
- }
- if interceptor == nil {
- return srv.(BackendServer).StoresSet(ctx, in)
- }
- info := &grpc.UnaryServerInfo{
- Server: srv,
- FullMethod: Backend_StoresSet_FullMethodName,
- }
- handler := func(ctx context.Context, req interface{}) (interface{}, error) {
- return srv.(BackendServer).StoresSet(ctx, req.(*StoresSetOptions))
- }
- return interceptor(ctx, in, info, handler)
-}
-
-func _Backend_StoresDelete_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
- in := new(StoresDeleteOptions)
- if err := dec(in); err != nil {
- return nil, err
- }
- if interceptor == nil {
- return srv.(BackendServer).StoresDelete(ctx, in)
- }
- info := &grpc.UnaryServerInfo{
- Server: srv,
- FullMethod: Backend_StoresDelete_FullMethodName,
- }
- handler := func(ctx context.Context, req interface{}) (interface{}, error) {
- return srv.(BackendServer).StoresDelete(ctx, req.(*StoresDeleteOptions))
- }
- return interceptor(ctx, in, info, handler)
-}
-
-func _Backend_StoresGet_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
- in := new(StoresGetOptions)
- if err := dec(in); err != nil {
- return nil, err
- }
- if interceptor == nil {
- return srv.(BackendServer).StoresGet(ctx, in)
- }
- info := &grpc.UnaryServerInfo{
- Server: srv,
- FullMethod: Backend_StoresGet_FullMethodName,
- }
- handler := func(ctx context.Context, req interface{}) (interface{}, error) {
- return srv.(BackendServer).StoresGet(ctx, req.(*StoresGetOptions))
- }
- return interceptor(ctx, in, info, handler)
-}
-
-func _Backend_StoresFind_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
- in := new(StoresFindOptions)
- if err := dec(in); err != nil {
- return nil, err
- }
- if interceptor == nil {
- return srv.(BackendServer).StoresFind(ctx, in)
- }
- info := &grpc.UnaryServerInfo{
- Server: srv,
- FullMethod: Backend_StoresFind_FullMethodName,
- }
- handler := func(ctx context.Context, req interface{}) (interface{}, error) {
- return srv.(BackendServer).StoresFind(ctx, req.(*StoresFindOptions))
- }
- return interceptor(ctx, in, info, handler)
-}
-
-// Backend_ServiceDesc is the grpc.ServiceDesc for Backend service.
-// It's only intended for direct use with grpc.RegisterService,
-// and not to be introspected or modified (even as a copy)
-var Backend_ServiceDesc = grpc.ServiceDesc{
- ServiceName: "backend.Backend",
- HandlerType: (*BackendServer)(nil),
- Methods: []grpc.MethodDesc{
- {
- MethodName: "Health",
- Handler: _Backend_Health_Handler,
- },
- {
- MethodName: "Predict",
- Handler: _Backend_Predict_Handler,
- },
- {
- MethodName: "LoadModel",
- Handler: _Backend_LoadModel_Handler,
- },
- {
- MethodName: "Embedding",
- Handler: _Backend_Embedding_Handler,
- },
- {
- MethodName: "GenerateImage",
- Handler: _Backend_GenerateImage_Handler,
- },
- {
- MethodName: "AudioTranscription",
- Handler: _Backend_AudioTranscription_Handler,
- },
- {
- MethodName: "TTS",
- Handler: _Backend_TTS_Handler,
- },
- {
- MethodName: "TokenizeString",
- Handler: _Backend_TokenizeString_Handler,
- },
- {
- MethodName: "Status",
- Handler: _Backend_Status_Handler,
- },
- {
- MethodName: "StoresSet",
- Handler: _Backend_StoresSet_Handler,
- },
- {
- MethodName: "StoresDelete",
- Handler: _Backend_StoresDelete_Handler,
- },
- {
- MethodName: "StoresGet",
- Handler: _Backend_StoresGet_Handler,
- },
- {
- MethodName: "StoresFind",
- Handler: _Backend_StoresFind_Handler,
- },
- },
- Streams: []grpc.StreamDesc{
- {
- StreamName: "PredictStream",
- Handler: _Backend_PredictStream_Handler,
- ServerStreams: true,
- },
- },
- Metadata: "backend.proto",
-}
From eed5706994a3e770a0194cad9d1cfd724ba1b10a Mon Sep 17 00:00:00 2001
From: Dave
Date: Sat, 13 Apr 2024 03:45:34 -0400
Subject: [PATCH 0146/2750] refactor: backend/service split, channel-based llm
flow (#1963)
Refactor: channel based llm flow and services split
---------
Signed-off-by: Dave Lee
---
.github/workflows/test.yml | 15 +-
Makefile | 18 +-
backend/go/transcribe/transcript.go | 6 +-
backend/go/transcribe/whisper.go | 2 +-
core/backend/embeddings.go | 90 +-
core/backend/image.go | 261 +++++-
core/backend/llm.go | 271 ++++--
core/backend/options.go | 84 +-
core/backend/transcript.go | 41 +-
core/backend/tts.go | 77 +-
core/cli/run.go | 8 +-
core/cli/transcript.go | 19 +-
core/cli/tts.go | 26 +-
core/config/backend_config.go | 301 +------
core/config/backend_config_loader.go | 509 +++++++++++
core/config/exports_test.go | 6 +
core/http/api.go | 209 +++--
core/http/api_test.go | 98 ++-
core/http/ctx/fiber.go | 65 +-
core/http/endpoints/elevenlabs/tts.go | 39 +-
.../http/endpoints/localai/backend_monitor.go | 4 +-
core/http/endpoints/localai/tts.go | 39 +-
core/http/endpoints/openai/assistant.go | 2 +-
core/http/endpoints/openai/chat.go | 621 ++------------
core/http/endpoints/openai/completion.go | 163 +---
core/http/endpoints/openai/edit.go | 78 +-
core/http/endpoints/openai/embeddings.go | 65 +-
core/http/endpoints/openai/image.go | 218 +----
core/http/endpoints/openai/inference.go | 55 --
core/http/endpoints/openai/list.go | 52 +-
core/http/endpoints/openai/request.go | 285 -------
core/http/endpoints/openai/transcription.go | 28 +-
core/schema/{whisper.go => transcription.go} | 2 +-
core/services/backend_monitor.go | 30 +-
core/services/gallery.go | 116 ++-
core/services/list_models.go | 72 ++
.../services}/model_preload_test.go | 5 +-
core/services/openai.go | 805 ++++++++++++++++++
core/startup/startup.go | 91 +-
core/state.go | 41 +
.../llm text/-completions Stream.bru | 25 +
pkg/concurrency/concurrency.go | 135 +++
pkg/concurrency/concurrency_test.go | 101 +++
pkg/concurrency/types.go | 6 +
pkg/grpc/backend.go | 2 +-
pkg/grpc/base/base.go | 4 +-
pkg/grpc/client.go | 4 +-
pkg/grpc/embed.go | 4 +-
pkg/grpc/interface.go | 2 +-
pkg/model/initializers.go | 8 +-
pkg/startup/model_preload.go | 85 --
pkg/utils/base64.go | 50 ++
52 files changed, 3064 insertions(+), 2279 deletions(-)
create mode 100644 core/config/backend_config_loader.go
create mode 100644 core/config/exports_test.go
delete mode 100644 core/http/endpoints/openai/inference.go
delete mode 100644 core/http/endpoints/openai/request.go
rename core/schema/{whisper.go => transcription.go} (90%)
create mode 100644 core/services/list_models.go
rename {pkg/startup => core/services}/model_preload_test.go (96%)
create mode 100644 core/services/openai.go
create mode 100644 core/state.go
create mode 100644 examples/bruno/LocalAI Test Requests/llm text/-completions Stream.bru
create mode 100644 pkg/concurrency/concurrency.go
create mode 100644 pkg/concurrency/concurrency_test.go
create mode 100644 pkg/concurrency/types.go
delete mode 100644 pkg/startup/model_preload.go
create mode 100644 pkg/utils/base64.go
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 46c4e065..29bd3e08 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -121,8 +121,9 @@ jobs:
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
- name: Setup tmate session if tests fail
if: ${{ failure() }}
- uses: mxschmitt/action-tmate@v3
- timeout-minutes: 5
+ uses: dave-gray101/action-tmate@master
+ with:
+ connect-timeout-seconds: 180
tests-aio-container:
runs-on: ubuntu-latest
@@ -173,8 +174,9 @@ jobs:
make run-e2e-aio
- name: Setup tmate session if tests fail
if: ${{ failure() }}
- uses: mxschmitt/action-tmate@v3
- timeout-minutes: 5
+ uses: dave-gray101/action-tmate@master
+ with:
+ connect-timeout-seconds: 180
tests-apple:
runs-on: macOS-14
@@ -207,5 +209,6 @@ jobs:
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make --jobs 4 --output-sync=target test
- name: Setup tmate session if tests fail
if: ${{ failure() }}
- uses: mxschmitt/action-tmate@v3
- timeout-minutes: 5
\ No newline at end of file
+ uses: dave-gray101/action-tmate@master
+ with:
+ connect-timeout-seconds: 180
\ No newline at end of file
diff --git a/Makefile b/Makefile
index 5932dfb2..9f86ef23 100644
--- a/Makefile
+++ b/Makefile
@@ -301,6 +301,9 @@ clean-tests:
rm -rf test-dir
rm -rf core/http/backend-assets
+halt-backends: ## Used to clean up stray backends sometimes left running when debugging manually
+ ps | grep 'backend-assets/grpc/' | awk '{print $$1}' | xargs -I {} kill -9 {}
+
## Build:
build: prepare backend-assets grpcs ## Build the project
$(info ${GREEN}I local-ai build info:${RESET})
@@ -365,13 +368,13 @@ run-e2e-image:
run-e2e-aio:
@echo 'Running e2e AIO tests'
- $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e-aio
+ $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e-aio
test-e2e:
@echo 'Running e2e tests'
BUILD_TYPE=$(BUILD_TYPE) \
LOCALAI_API=http://$(E2E_BRIDGE_IP):5390/v1 \
- $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e
+ $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e
teardown-e2e:
rm -rf $(TEST_DIR) || true
@@ -379,15 +382,15 @@ teardown-e2e:
test-gpt4all: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
- $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts 5 -v -r $(TEST_PATHS)
+ $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
test-llama: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
- $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r $(TEST_PATHS)
+ $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
test-llama-gguf: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
- $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts 5 -v -r $(TEST_PATHS)
+ $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
test-tts: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
@@ -636,7 +639,10 @@ backend-assets/grpc/llama-ggml: sources/go-llama-ggml sources/go-llama-ggml/libb
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama-ggml
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama-ggml LIBRARY_PATH=$(CURDIR)/sources/go-llama-ggml \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
-
+# EXPERIMENTAL:
+ifeq ($(BUILD_TYPE),metal)
+ cp $(CURDIR)/sources/go-llama-ggml/llama.cpp/ggml-metal.metal backend-assets/grpc/
+endif
backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
diff --git a/backend/go/transcribe/transcript.go b/backend/go/transcribe/transcript.go
index fdfaa974..b38d5b9f 100644
--- a/backend/go/transcribe/transcript.go
+++ b/backend/go/transcribe/transcript.go
@@ -21,7 +21,7 @@ func runCommand(command []string) (string, error) {
// AudioToWav converts audio to wav for transcribe.
// TODO: use https://github.com/mccoyst/ogg?
func audioToWav(src, dst string) error {
- command := []string{"ffmpeg", "-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst}
+ command := []string{"ffmpeg", "-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst}
out, err := runCommand(command)
if err != nil {
return fmt.Errorf("error: %w out: %s", err, out)
@@ -29,8 +29,8 @@ func audioToWav(src, dst string) error {
return nil
}
-func Transcript(model whisper.Model, audiopath, language string, threads uint) (schema.Result, error) {
- res := schema.Result{}
+func Transcript(model whisper.Model, audiopath, language string, threads uint) (schema.TranscriptionResult, error) {
+ res := schema.TranscriptionResult{}
dir, err := os.MkdirTemp("", "whisper")
if err != nil {
diff --git a/backend/go/transcribe/whisper.go b/backend/go/transcribe/whisper.go
index ac93be01..a9a62d24 100644
--- a/backend/go/transcribe/whisper.go
+++ b/backend/go/transcribe/whisper.go
@@ -21,6 +21,6 @@ func (sd *Whisper) Load(opts *pb.ModelOptions) error {
return err
}
-func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.Result, error) {
+func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.TranscriptionResult, error) {
return Transcript(sd.whisper, opts.Dst, opts.Language, uint(opts.Threads))
}
diff --git a/core/backend/embeddings.go b/core/backend/embeddings.go
index 03ff90b9..2c63dedc 100644
--- a/core/backend/embeddings.go
+++ b/core/backend/embeddings.go
@@ -2,14 +2,100 @@ package backend
import (
"fmt"
+ "time"
"github.com/go-skynet/LocalAI/core/config"
+ "github.com/go-skynet/LocalAI/core/schema"
+ "github.com/google/uuid"
+ "github.com/go-skynet/LocalAI/pkg/concurrency"
"github.com/go-skynet/LocalAI/pkg/grpc"
- model "github.com/go-skynet/LocalAI/pkg/model"
+ "github.com/go-skynet/LocalAI/pkg/model"
)
-func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) {
+type EmbeddingsBackendService struct {
+ ml *model.ModelLoader
+ bcl *config.BackendConfigLoader
+ appConfig *config.ApplicationConfig
+}
+
+func NewEmbeddingsBackendService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *EmbeddingsBackendService {
+ return &EmbeddingsBackendService{
+ ml: ml,
+ bcl: bcl,
+ appConfig: appConfig,
+ }
+}
+
+func (ebs *EmbeddingsBackendService) Embeddings(request *schema.OpenAIRequest) <-chan concurrency.ErrorOr[*schema.OpenAIResponse] {
+
+ resultChannel := make(chan concurrency.ErrorOr[*schema.OpenAIResponse])
+ go func(request *schema.OpenAIRequest) {
+ if request.Model == "" {
+ request.Model = model.StableDiffusionBackend
+ }
+
+ bc, request, err := ebs.bcl.LoadBackendConfigForModelAndOpenAIRequest(request.Model, request, ebs.appConfig)
+ if err != nil {
+ resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
+ close(resultChannel)
+ return
+ }
+
+ items := []schema.Item{}
+
+ for i, s := range bc.InputToken {
+ // get the model function to call for the result
+ embedFn, err := modelEmbedding("", s, ebs.ml, bc, ebs.appConfig)
+ if err != nil {
+ resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
+ close(resultChannel)
+ return
+ }
+
+ embeddings, err := embedFn()
+ if err != nil {
+ resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
+ close(resultChannel)
+ return
+ }
+ items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"})
+ }
+
+ for i, s := range bc.InputStrings {
+ // get the model function to call for the result
+ embedFn, err := modelEmbedding(s, []int{}, ebs.ml, bc, ebs.appConfig)
+ if err != nil {
+ resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
+ close(resultChannel)
+ return
+ }
+
+ embeddings, err := embedFn()
+ if err != nil {
+ resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
+ close(resultChannel)
+ return
+ }
+ items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"})
+ }
+
+ id := uuid.New().String()
+ created := int(time.Now().Unix())
+ resp := &schema.OpenAIResponse{
+ ID: id,
+ Created: created,
+ Model: request.Model, // we have to return what the user sent here, due to OpenAI spec.
+ Data: items,
+ Object: "list",
+ }
+ resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: resp}
+ close(resultChannel)
+ }(request)
+ return resultChannel
+}
+
+func modelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig *config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) {
modelFile := backendConfig.Model
grpcOpts := gRPCModelOpts(backendConfig)
diff --git a/core/backend/image.go b/core/backend/image.go
index b0cffb0b..affb3bb3 100644
--- a/core/backend/image.go
+++ b/core/backend/image.go
@@ -1,18 +1,252 @@
package backend
import (
- "github.com/go-skynet/LocalAI/core/config"
+ "bufio"
+ "encoding/base64"
+ "fmt"
+ "io"
+ "net/http"
+ "os"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "time"
+ "github.com/go-skynet/LocalAI/core/config"
+ "github.com/go-skynet/LocalAI/core/schema"
+ "github.com/google/uuid"
+ "github.com/rs/zerolog/log"
+
+ "github.com/go-skynet/LocalAI/pkg/concurrency"
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
- model "github.com/go-skynet/LocalAI/pkg/model"
+ "github.com/go-skynet/LocalAI/pkg/model"
)
-func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) {
+type ImageGenerationBackendService struct {
+ ml *model.ModelLoader
+ bcl *config.BackendConfigLoader
+ appConfig *config.ApplicationConfig
+ BaseUrlForGeneratedImages string
+}
+
+func NewImageGenerationBackendService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *ImageGenerationBackendService {
+ return &ImageGenerationBackendService{
+ ml: ml,
+ bcl: bcl,
+ appConfig: appConfig,
+ }
+}
+
+func (igbs *ImageGenerationBackendService) GenerateImage(request *schema.OpenAIRequest) <-chan concurrency.ErrorOr[*schema.OpenAIResponse] {
+ resultChannel := make(chan concurrency.ErrorOr[*schema.OpenAIResponse])
+ go func(request *schema.OpenAIRequest) {
+ bc, request, err := igbs.bcl.LoadBackendConfigForModelAndOpenAIRequest(request.Model, request, igbs.appConfig)
+ if err != nil {
+ resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
+ close(resultChannel)
+ return
+ }
+
+ src := ""
+ if request.File != "" {
+
+ var fileData []byte
+ // check if input.File is an URL, if so download it and save it
+ // to a temporary file
+ if strings.HasPrefix(request.File, "http://") || strings.HasPrefix(request.File, "https://") {
+ out, err := downloadFile(request.File)
+ if err != nil {
+ resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: fmt.Errorf("failed downloading file:%w", err)}
+ close(resultChannel)
+ return
+ }
+ defer os.RemoveAll(out)
+
+ fileData, err = os.ReadFile(out)
+ if err != nil {
+ resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: fmt.Errorf("failed reading file:%w", err)}
+ close(resultChannel)
+ return
+ }
+
+ } else {
+ // base 64 decode the file and write it somewhere
+ // that we will cleanup
+ fileData, err = base64.StdEncoding.DecodeString(request.File)
+ if err != nil {
+ resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
+ close(resultChannel)
+ return
+ }
+ }
+
+ // Create a temporary file
+ outputFile, err := os.CreateTemp(igbs.appConfig.ImageDir, "b64")
+ if err != nil {
+ resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
+ close(resultChannel)
+ return
+ }
+ // write the base64 result
+ writer := bufio.NewWriter(outputFile)
+ _, err = writer.Write(fileData)
+ if err != nil {
+ outputFile.Close()
+ resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
+ close(resultChannel)
+ return
+ }
+ outputFile.Close()
+ src = outputFile.Name()
+ defer os.RemoveAll(src)
+ }
+
+ log.Debug().Msgf("Parameter Config: %+v", bc)
+
+ switch bc.Backend {
+ case "stablediffusion":
+ bc.Backend = model.StableDiffusionBackend
+ case "tinydream":
+ bc.Backend = model.TinyDreamBackend
+ case "":
+ bc.Backend = model.StableDiffusionBackend
+ if bc.Model == "" {
+ bc.Model = "stablediffusion_assets" // TODO: check?
+ }
+ }
+
+ sizeParts := strings.Split(request.Size, "x")
+ if len(sizeParts) != 2 {
+ resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: fmt.Errorf("invalid value for 'size'")}
+ close(resultChannel)
+ return
+ }
+ width, err := strconv.Atoi(sizeParts[0])
+ if err != nil {
+ resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: fmt.Errorf("invalid value for 'size'")}
+ close(resultChannel)
+ return
+ }
+ height, err := strconv.Atoi(sizeParts[1])
+ if err != nil {
+ resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: fmt.Errorf("invalid value for 'size'")}
+ close(resultChannel)
+ return
+ }
+
+ b64JSON := false
+ if request.ResponseFormat.Type == "b64_json" {
+ b64JSON = true
+ }
+ // src and clip_skip
+ var result []schema.Item
+ for _, i := range bc.PromptStrings {
+ n := request.N
+ if request.N == 0 {
+ n = 1
+ }
+ for j := 0; j < n; j++ {
+ prompts := strings.Split(i, "|")
+ positive_prompt := prompts[0]
+ negative_prompt := ""
+ if len(prompts) > 1 {
+ negative_prompt = prompts[1]
+ }
+
+ mode := 0
+ step := bc.Step
+ if step == 0 {
+ step = 15
+ }
+
+ if request.Mode != 0 {
+ mode = request.Mode
+ }
+
+ if request.Step != 0 {
+ step = request.Step
+ }
+
+ tempDir := ""
+ if !b64JSON {
+ tempDir = igbs.appConfig.ImageDir
+ }
+ // Create a temporary file
+ outputFile, err := os.CreateTemp(tempDir, "b64")
+ if err != nil {
+ resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
+ close(resultChannel)
+ return
+ }
+ outputFile.Close()
+ output := outputFile.Name() + ".png"
+ // Rename the temporary file
+ err = os.Rename(outputFile.Name(), output)
+ if err != nil {
+ resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
+ close(resultChannel)
+ return
+ }
+
+ if request.Seed == nil {
+ zVal := 0 // Idiomatic way to do this? Actually needed?
+ request.Seed = &zVal
+ }
+
+ fn, err := imageGeneration(height, width, mode, step, *request.Seed, positive_prompt, negative_prompt, src, output, igbs.ml, bc, igbs.appConfig)
+ if err != nil {
+ resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
+ close(resultChannel)
+ return
+ }
+ if err := fn(); err != nil {
+ resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
+ close(resultChannel)
+ return
+ }
+
+ item := &schema.Item{}
+
+ if b64JSON {
+ defer os.RemoveAll(output)
+ data, err := os.ReadFile(output)
+ if err != nil {
+ resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
+ close(resultChannel)
+ return
+ }
+ item.B64JSON = base64.StdEncoding.EncodeToString(data)
+ } else {
+ base := filepath.Base(output)
+ item.URL = igbs.BaseUrlForGeneratedImages + base
+ }
+
+ result = append(result, *item)
+ }
+ }
+
+ id := uuid.New().String()
+ created := int(time.Now().Unix())
+ resp := &schema.OpenAIResponse{
+ ID: id,
+ Created: created,
+ Data: result,
+ }
+ resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: resp}
+ close(resultChannel)
+ }(request)
+ return resultChannel
+}
+
+func imageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig *config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) {
+
threads := backendConfig.Threads
if *threads == 0 && appConfig.Threads != 0 {
threads = &appConfig.Threads
}
+
gRPCOpts := gRPCModelOpts(backendConfig)
+
opts := modelOpts(backendConfig, appConfig, []model.Option{
model.WithBackendString(backendConfig.Backend),
model.WithAssetDir(appConfig.AssetsDestination),
@@ -50,3 +284,24 @@ func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negat
return fn, nil
}
+
+// TODO: Replace this function with pkg/downloader - no reason to have a (crappier) bespoke download file fn here, but get things working before that change.
+func downloadFile(url string) (string, error) {
+ // Get the data
+ resp, err := http.Get(url)
+ if err != nil {
+ return "", err
+ }
+ defer resp.Body.Close()
+
+ // Create the file
+ out, err := os.CreateTemp("", "image")
+ if err != nil {
+ return "", err
+ }
+ defer out.Close()
+
+ // Write the body to file
+ _, err = io.Copy(out, resp.Body)
+ return out.Name(), err
+}
diff --git a/core/backend/llm.go b/core/backend/llm.go
index 493dc25c..1878e87a 100644
--- a/core/backend/llm.go
+++ b/core/backend/llm.go
@@ -11,17 +11,22 @@ import (
"github.com/go-skynet/LocalAI/core/config"
"github.com/go-skynet/LocalAI/core/schema"
+ "github.com/rs/zerolog/log"
+ "github.com/go-skynet/LocalAI/pkg/concurrency"
"github.com/go-skynet/LocalAI/pkg/gallery"
"github.com/go-skynet/LocalAI/pkg/grpc"
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
- model "github.com/go-skynet/LocalAI/pkg/model"
+ "github.com/go-skynet/LocalAI/pkg/model"
"github.com/go-skynet/LocalAI/pkg/utils"
)
-type LLMResponse struct {
- Response string // should this be []byte?
- Usage TokenUsage
+type LLMRequest struct {
+ Id int // TODO Remove if not used.
+ Text string
+ Images []string
+ RawMessages []schema.Message
+ // TODO: Other Modalities?
}
type TokenUsage struct {
@@ -29,57 +34,94 @@ type TokenUsage struct {
Completion int
}
-func ModelInference(ctx context.Context, s string, messages []schema.Message, images []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
- modelFile := c.Model
- threads := c.Threads
- if *threads == 0 && o.Threads != 0 {
- threads = &o.Threads
+type LLMResponse struct {
+ Request *LLMRequest
+ Response string // should this be []byte?
+ Usage TokenUsage
+}
+
+// TODO: Does this belong here or in core/services/openai.go?
+type LLMResponseBundle struct {
+ Request *schema.OpenAIRequest
+ Response []schema.Choice
+ Usage TokenUsage
+}
+
+type LLMBackendService struct {
+ bcl *config.BackendConfigLoader
+ ml *model.ModelLoader
+ appConfig *config.ApplicationConfig
+ ftMutex sync.Mutex
+ cutstrings map[string]*regexp.Regexp
+}
+
+func NewLLMBackendService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *LLMBackendService {
+ return &LLMBackendService{
+ bcl: bcl,
+ ml: ml,
+ appConfig: appConfig,
+ ftMutex: sync.Mutex{},
+ cutstrings: make(map[string]*regexp.Regexp),
}
- grpcOpts := gRPCModelOpts(c)
+}
+
+// TODO: Should ctx param be removed and replaced with hardcoded req.Context?
+func (llmbs *LLMBackendService) Inference(ctx context.Context, req *LLMRequest, bc *config.BackendConfig, enableTokenChannel bool) (
+ resultChannel <-chan concurrency.ErrorOr[*LLMResponse], tokenChannel <-chan concurrency.ErrorOr[*LLMResponse], err error) {
+
+ threads := bc.Threads
+ if (threads == nil || *threads == 0) && llmbs.appConfig.Threads != 0 {
+ threads = &llmbs.appConfig.Threads
+ }
+
+ grpcOpts := gRPCModelOpts(bc)
var inferenceModel grpc.Backend
- var err error
- opts := modelOpts(c, o, []model.Option{
+ opts := modelOpts(bc, llmbs.appConfig, []model.Option{
model.WithLoadGRPCLoadModelOpts(grpcOpts),
model.WithThreads(uint32(*threads)), // some models uses this to allocate threads during startup
- model.WithAssetDir(o.AssetsDestination),
- model.WithModel(modelFile),
- model.WithContext(o.Context),
+ model.WithAssetDir(llmbs.appConfig.AssetsDestination),
+ model.WithModel(bc.Model),
+ model.WithContext(llmbs.appConfig.Context),
})
- if c.Backend != "" {
- opts = append(opts, model.WithBackendString(c.Backend))
+ if bc.Backend != "" {
+ opts = append(opts, model.WithBackendString(bc.Backend))
}
- // Check if the modelFile exists, if it doesn't try to load it from the gallery
- if o.AutoloadGalleries { // experimental
- if _, err := os.Stat(modelFile); os.IsNotExist(err) {
+ // Check if bc.Model exists, if it doesn't try to load it from the gallery
+ if llmbs.appConfig.AutoloadGalleries { // experimental
+ if _, err := os.Stat(bc.Model); os.IsNotExist(err) {
utils.ResetDownloadTimers()
// if we failed to load the model, we try to download it
- err := gallery.InstallModelFromGalleryByName(o.Galleries, modelFile, loader.ModelPath, gallery.GalleryModel{}, utils.DisplayDownloadFunction)
+ err := gallery.InstallModelFromGalleryByName(llmbs.appConfig.Galleries, bc.Model, llmbs.appConfig.ModelPath, gallery.GalleryModel{}, utils.DisplayDownloadFunction)
if err != nil {
- return nil, err
+ return nil, nil, err
}
}
}
- if c.Backend == "" {
- inferenceModel, err = loader.GreedyLoader(opts...)
+ if bc.Backend == "" {
+ log.Debug().Msgf("backend not known for %q, falling back to greedy loader to find it", bc.Model)
+ inferenceModel, err = llmbs.ml.GreedyLoader(opts...)
} else {
- inferenceModel, err = loader.BackendLoader(opts...)
+ inferenceModel, err = llmbs.ml.BackendLoader(opts...)
}
if err != nil {
- return nil, err
+ log.Error().Err(err).Msg("[llmbs.Inference] failed to load a backend")
+ return
}
- var protoMessages []*proto.Message
- // if we are using the tokenizer template, we need to convert the messages to proto messages
- // unless the prompt has already been tokenized (non-chat endpoints + functions)
- if c.TemplateConfig.UseTokenizerTemplate && s == "" {
- protoMessages = make([]*proto.Message, len(messages), len(messages))
- for i, message := range messages {
+ grpcPredOpts := gRPCPredictOpts(bc, llmbs.appConfig.ModelPath)
+ grpcPredOpts.Prompt = req.Text
+ grpcPredOpts.Images = req.Images
+
+ if bc.TemplateConfig.UseTokenizerTemplate && req.Text == "" {
+ grpcPredOpts.UseTokenizerTemplate = true
+ protoMessages := make([]*proto.Message, len(req.RawMessages), len(req.RawMessages))
+ for i, message := range req.RawMessages {
protoMessages[i] = &proto.Message{
Role: message.Role,
}
@@ -87,47 +129,32 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
case string:
protoMessages[i].Content = ct
default:
- return nil, fmt.Errorf("Unsupported type for schema.Message.Content for inference: %T", ct)
+ err = fmt.Errorf("unsupported type for schema.Message.Content for inference: %T", ct)
+ return
}
}
}
- // in GRPC, the backend is supposed to answer to 1 single token if stream is not supported
- fn := func() (LLMResponse, error) {
- opts := gRPCPredictOpts(c, loader.ModelPath)
- opts.Prompt = s
- opts.Messages = protoMessages
- opts.UseTokenizerTemplate = c.TemplateConfig.UseTokenizerTemplate
- opts.Images = images
+ tokenUsage := TokenUsage{}
- tokenUsage := TokenUsage{}
+ promptInfo, pErr := inferenceModel.TokenizeString(ctx, grpcPredOpts)
+ if pErr == nil && promptInfo.Length > 0 {
+ tokenUsage.Prompt = int(promptInfo.Length)
+ }
- // check the per-model feature flag for usage, since tokenCallback may have a cost.
- // Defaults to off as for now it is still experimental
- if c.FeatureFlag.Enabled("usage") {
- userTokenCallback := tokenCallback
- if userTokenCallback == nil {
- userTokenCallback = func(token string, usage TokenUsage) bool {
- return true
- }
- }
+ rawResultChannel := make(chan concurrency.ErrorOr[*LLMResponse])
+ // TODO this next line is the biggest argument for taking named return values _back_ out!!!
+ var rawTokenChannel chan concurrency.ErrorOr[*LLMResponse]
- promptInfo, pErr := inferenceModel.TokenizeString(ctx, opts)
- if pErr == nil && promptInfo.Length > 0 {
- tokenUsage.Prompt = int(promptInfo.Length)
- }
+ if enableTokenChannel {
+ rawTokenChannel = make(chan concurrency.ErrorOr[*LLMResponse])
- tokenCallback = func(token string, usage TokenUsage) bool {
- tokenUsage.Completion++
- return userTokenCallback(token, tokenUsage)
- }
- }
-
- if tokenCallback != nil {
- ss := ""
+ // TODO Needs better name
+ ss := ""
+ go func() {
var partialRune []byte
- err := inferenceModel.PredictStream(ctx, opts, func(chars []byte) {
+ err := inferenceModel.PredictStream(ctx, grpcPredOpts, func(chars []byte) {
partialRune = append(partialRune, chars...)
for len(partialRune) > 0 {
@@ -137,48 +164,120 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
break
}
- tokenCallback(string(r), tokenUsage)
+ tokenUsage.Completion++
+ rawTokenChannel <- concurrency.ErrorOr[*LLMResponse]{Value: &LLMResponse{
+ Response: string(r),
+ Usage: tokenUsage,
+ }}
+
ss += string(r)
partialRune = partialRune[size:]
}
})
- return LLMResponse{
- Response: ss,
- Usage: tokenUsage,
- }, err
- } else {
- // TODO: Is the chicken bit the only way to get here? is that acceptable?
- reply, err := inferenceModel.Predict(ctx, opts)
+ close(rawTokenChannel)
if err != nil {
- return LLMResponse{}, err
+ rawResultChannel <- concurrency.ErrorOr[*LLMResponse]{Error: err}
+ } else {
+ rawResultChannel <- concurrency.ErrorOr[*LLMResponse]{Value: &LLMResponse{
+ Response: ss,
+ Usage: tokenUsage,
+ }}
}
- return LLMResponse{
- Response: string(reply.Message),
- Usage: tokenUsage,
- }, err
- }
+ close(rawResultChannel)
+ }()
+ } else {
+ go func() {
+ reply, err := inferenceModel.Predict(ctx, grpcPredOpts)
+ if err != nil {
+ rawResultChannel <- concurrency.ErrorOr[*LLMResponse]{Error: err}
+ close(rawResultChannel)
+ } else {
+ rawResultChannel <- concurrency.ErrorOr[*LLMResponse]{Value: &LLMResponse{
+ Response: string(reply.Message),
+ Usage: tokenUsage,
+ }}
+ close(rawResultChannel)
+ }
+ }()
}
- return fn, nil
+ resultChannel = rawResultChannel
+ tokenChannel = rawTokenChannel
+ return
}
-var cutstrings map[string]*regexp.Regexp = make(map[string]*regexp.Regexp)
-var mu sync.Mutex = sync.Mutex{}
+// TODO: Should predInput be a seperate param still, or should this fn handle extracting it from request??
+func (llmbs *LLMBackendService) GenerateText(predInput string, request *schema.OpenAIRequest, bc *config.BackendConfig,
+ mappingFn func(*LLMResponse) schema.Choice, enableCompletionChannels bool, enableTokenChannels bool) (
+ // Returns:
+ resultChannel <-chan concurrency.ErrorOr[*LLMResponseBundle], completionChannels []<-chan concurrency.ErrorOr[*LLMResponse], tokenChannels []<-chan concurrency.ErrorOr[*LLMResponse], err error) {
-func Finetune(config config.BackendConfig, input, prediction string) string {
+ rawChannel := make(chan concurrency.ErrorOr[*LLMResponseBundle])
+ resultChannel = rawChannel
+
+ if request.N == 0 { // number of completions to return
+ request.N = 1
+ }
+ images := []string{}
+ for _, m := range request.Messages {
+ images = append(images, m.StringImages...)
+ }
+
+ for i := 0; i < request.N; i++ {
+
+ individualResultChannel, tokenChannel, infErr := llmbs.Inference(request.Context, &LLMRequest{
+ Text: predInput,
+ Images: images,
+ RawMessages: request.Messages,
+ }, bc, enableTokenChannels)
+ if infErr != nil {
+ err = infErr // Avoids complaints about redeclaring err but looks dumb
+ return
+ }
+ completionChannels = append(completionChannels, individualResultChannel)
+ tokenChannels = append(tokenChannels, tokenChannel)
+ }
+
+ go func() {
+ initialBundle := LLMResponseBundle{
+ Request: request,
+ Response: []schema.Choice{},
+ Usage: TokenUsage{},
+ }
+
+ wg := concurrency.SliceOfChannelsReducer(completionChannels, rawChannel, func(iv concurrency.ErrorOr[*LLMResponse], ov concurrency.ErrorOr[*LLMResponseBundle]) concurrency.ErrorOr[*LLMResponseBundle] {
+ if iv.Error != nil {
+ ov.Error = iv.Error
+ // TODO: Decide if we should wipe partials or not?
+ return ov
+ }
+ ov.Value.Usage.Prompt += iv.Value.Usage.Prompt
+ ov.Value.Usage.Completion += iv.Value.Usage.Completion
+
+ ov.Value.Response = append(ov.Value.Response, mappingFn(iv.Value))
+ return ov
+ }, concurrency.ErrorOr[*LLMResponseBundle]{Value: &initialBundle}, true)
+ wg.Wait()
+
+ }()
+
+ return
+}
+
+func (llmbs *LLMBackendService) Finetune(config config.BackendConfig, input, prediction string) string {
if config.Echo {
prediction = input + prediction
}
for _, c := range config.Cutstrings {
- mu.Lock()
- reg, ok := cutstrings[c]
+ llmbs.ftMutex.Lock()
+ reg, ok := llmbs.cutstrings[c]
if !ok {
- cutstrings[c] = regexp.MustCompile(c)
- reg = cutstrings[c]
+ llmbs.cutstrings[c] = regexp.MustCompile(c)
+ reg = llmbs.cutstrings[c]
}
- mu.Unlock()
+ llmbs.ftMutex.Unlock()
prediction = reg.ReplaceAllString(prediction, "")
}
diff --git a/core/backend/options.go b/core/backend/options.go
index 5b303b05..0b4e56db 100644
--- a/core/backend/options.go
+++ b/core/backend/options.go
@@ -10,7 +10,7 @@ import (
model "github.com/go-skynet/LocalAI/pkg/model"
)
-func modelOpts(c config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option {
+func modelOpts(bc *config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option {
if so.SingleBackend {
opts = append(opts, model.WithSingleActiveBackend())
}
@@ -19,12 +19,12 @@ func modelOpts(c config.BackendConfig, so *config.ApplicationConfig, opts []mode
opts = append(opts, model.EnableParallelRequests)
}
- if c.GRPC.Attempts != 0 {
- opts = append(opts, model.WithGRPCAttempts(c.GRPC.Attempts))
+ if bc.GRPC.Attempts != 0 {
+ opts = append(opts, model.WithGRPCAttempts(bc.GRPC.Attempts))
}
- if c.GRPC.AttemptsSleepTime != 0 {
- opts = append(opts, model.WithGRPCAttemptsDelay(c.GRPC.AttemptsSleepTime))
+ if bc.GRPC.AttemptsSleepTime != 0 {
+ opts = append(opts, model.WithGRPCAttemptsDelay(bc.GRPC.AttemptsSleepTime))
}
for k, v := range so.ExternalGRPCBackends {
@@ -34,7 +34,7 @@ func modelOpts(c config.BackendConfig, so *config.ApplicationConfig, opts []mode
return opts
}
-func getSeed(c config.BackendConfig) int32 {
+func getSeed(c *config.BackendConfig) int32 {
seed := int32(*c.Seed)
if seed == config.RAND_SEED {
seed = rand.Int31()
@@ -43,7 +43,7 @@ func getSeed(c config.BackendConfig) int32 {
return seed
}
-func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
+func gRPCModelOpts(c *config.BackendConfig) *pb.ModelOptions {
b := 512
if c.Batch != 0 {
b = c.Batch
@@ -104,47 +104,47 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
}
}
-func gRPCPredictOpts(c config.BackendConfig, modelPath string) *pb.PredictOptions {
+func gRPCPredictOpts(bc *config.BackendConfig, modelPath string) *pb.PredictOptions {
promptCachePath := ""
- if c.PromptCachePath != "" {
- p := filepath.Join(modelPath, c.PromptCachePath)
+ if bc.PromptCachePath != "" {
+ p := filepath.Join(modelPath, bc.PromptCachePath)
os.MkdirAll(filepath.Dir(p), 0755)
promptCachePath = p
}
return &pb.PredictOptions{
- Temperature: float32(*c.Temperature),
- TopP: float32(*c.TopP),
- NDraft: c.NDraft,
- TopK: int32(*c.TopK),
- Tokens: int32(*c.Maxtokens),
- Threads: int32(*c.Threads),
- PromptCacheAll: c.PromptCacheAll,
- PromptCacheRO: c.PromptCacheRO,
+ Temperature: float32(*bc.Temperature),
+ TopP: float32(*bc.TopP),
+ NDraft: bc.NDraft,
+ TopK: int32(*bc.TopK),
+ Tokens: int32(*bc.Maxtokens),
+ Threads: int32(*bc.Threads),
+ PromptCacheAll: bc.PromptCacheAll,
+ PromptCacheRO: bc.PromptCacheRO,
PromptCachePath: promptCachePath,
- F16KV: *c.F16,
- DebugMode: *c.Debug,
- Grammar: c.Grammar,
- NegativePromptScale: c.NegativePromptScale,
- RopeFreqBase: c.RopeFreqBase,
- RopeFreqScale: c.RopeFreqScale,
- NegativePrompt: c.NegativePrompt,
- Mirostat: int32(*c.LLMConfig.Mirostat),
- MirostatETA: float32(*c.LLMConfig.MirostatETA),
- MirostatTAU: float32(*c.LLMConfig.MirostatTAU),
- Debug: *c.Debug,
- StopPrompts: c.StopWords,
- Repeat: int32(c.RepeatPenalty),
- NKeep: int32(c.Keep),
- Batch: int32(c.Batch),
- IgnoreEOS: c.IgnoreEOS,
- Seed: getSeed(c),
- FrequencyPenalty: float32(c.FrequencyPenalty),
- MLock: *c.MMlock,
- MMap: *c.MMap,
- MainGPU: c.MainGPU,
- TensorSplit: c.TensorSplit,
- TailFreeSamplingZ: float32(*c.TFZ),
- TypicalP: float32(*c.TypicalP),
+ F16KV: *bc.F16,
+ DebugMode: *bc.Debug,
+ Grammar: bc.Grammar,
+ NegativePromptScale: bc.NegativePromptScale,
+ RopeFreqBase: bc.RopeFreqBase,
+ RopeFreqScale: bc.RopeFreqScale,
+ NegativePrompt: bc.NegativePrompt,
+ Mirostat: int32(*bc.LLMConfig.Mirostat),
+ MirostatETA: float32(*bc.LLMConfig.MirostatETA),
+ MirostatTAU: float32(*bc.LLMConfig.MirostatTAU),
+ Debug: *bc.Debug,
+ StopPrompts: bc.StopWords,
+ Repeat: int32(bc.RepeatPenalty),
+ NKeep: int32(bc.Keep),
+ Batch: int32(bc.Batch),
+ IgnoreEOS: bc.IgnoreEOS,
+ Seed: getSeed(bc),
+ FrequencyPenalty: float32(bc.FrequencyPenalty),
+ MLock: *bc.MMlock,
+ MMap: *bc.MMap,
+ MainGPU: bc.MainGPU,
+ TensorSplit: bc.TensorSplit,
+ TailFreeSamplingZ: float32(*bc.TFZ),
+ TypicalP: float32(*bc.TypicalP),
}
}
diff --git a/core/backend/transcript.go b/core/backend/transcript.go
index 4c3859df..6761c2ac 100644
--- a/core/backend/transcript.go
+++ b/core/backend/transcript.go
@@ -7,11 +7,48 @@ import (
"github.com/go-skynet/LocalAI/core/config"
"github.com/go-skynet/LocalAI/core/schema"
+ "github.com/go-skynet/LocalAI/pkg/concurrency"
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
- model "github.com/go-skynet/LocalAI/pkg/model"
+ "github.com/go-skynet/LocalAI/pkg/model"
)
-func ModelTranscription(audio, language string, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.Result, error) {
+type TranscriptionBackendService struct {
+ ml *model.ModelLoader
+ bcl *config.BackendConfigLoader
+ appConfig *config.ApplicationConfig
+}
+
+func NewTranscriptionBackendService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *TranscriptionBackendService {
+ return &TranscriptionBackendService{
+ ml: ml,
+ bcl: bcl,
+ appConfig: appConfig,
+ }
+}
+
+func (tbs *TranscriptionBackendService) Transcribe(request *schema.OpenAIRequest) <-chan concurrency.ErrorOr[*schema.TranscriptionResult] {
+ responseChannel := make(chan concurrency.ErrorOr[*schema.TranscriptionResult])
+ go func(request *schema.OpenAIRequest) {
+ bc, request, err := tbs.bcl.LoadBackendConfigForModelAndOpenAIRequest(request.Model, request, tbs.appConfig)
+ if err != nil {
+ responseChannel <- concurrency.ErrorOr[*schema.TranscriptionResult]{Error: fmt.Errorf("failed reading parameters from request:%w", err)}
+ close(responseChannel)
+ return
+ }
+
+ tr, err := modelTranscription(request.File, request.Language, tbs.ml, bc, tbs.appConfig)
+ if err != nil {
+ responseChannel <- concurrency.ErrorOr[*schema.TranscriptionResult]{Error: err}
+ close(responseChannel)
+ return
+ }
+ responseChannel <- concurrency.ErrorOr[*schema.TranscriptionResult]{Value: tr}
+ close(responseChannel)
+ }(request)
+ return responseChannel
+}
+
+func modelTranscription(audio, language string, ml *model.ModelLoader, backendConfig *config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) {
opts := modelOpts(backendConfig, appConfig, []model.Option{
model.WithBackendString(model.WhisperBackend),
diff --git a/core/backend/tts.go b/core/backend/tts.go
index f97b6202..d1fa270d 100644
--- a/core/backend/tts.go
+++ b/core/backend/tts.go
@@ -7,29 +7,60 @@ import (
"path/filepath"
"github.com/go-skynet/LocalAI/core/config"
+ "github.com/go-skynet/LocalAI/core/schema"
+ "github.com/go-skynet/LocalAI/pkg/concurrency"
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
- model "github.com/go-skynet/LocalAI/pkg/model"
+ "github.com/go-skynet/LocalAI/pkg/model"
"github.com/go-skynet/LocalAI/pkg/utils"
)
-func generateUniqueFileName(dir, baseName, ext string) string {
- counter := 1
- fileName := baseName + ext
+type TextToSpeechBackendService struct {
+ ml *model.ModelLoader
+ bcl *config.BackendConfigLoader
+ appConfig *config.ApplicationConfig
+}
- for {
- filePath := filepath.Join(dir, fileName)
- _, err := os.Stat(filePath)
- if os.IsNotExist(err) {
- return fileName
- }
-
- counter++
- fileName = fmt.Sprintf("%s_%d%s", baseName, counter, ext)
+func NewTextToSpeechBackendService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *TextToSpeechBackendService {
+ return &TextToSpeechBackendService{
+ ml: ml,
+ bcl: bcl,
+ appConfig: appConfig,
}
}
-func ModelTTS(backend, text, modelFile, voice string, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (string, *proto.Result, error) {
+func (ttsbs *TextToSpeechBackendService) TextToAudioFile(request *schema.TTSRequest) <-chan concurrency.ErrorOr[*string] {
+ responseChannel := make(chan concurrency.ErrorOr[*string])
+ go func(request *schema.TTSRequest) {
+ cfg, err := ttsbs.bcl.LoadBackendConfigFileByName(request.Model, ttsbs.appConfig.ModelPath,
+ config.LoadOptionDebug(ttsbs.appConfig.Debug),
+ config.LoadOptionThreads(ttsbs.appConfig.Threads),
+ config.LoadOptionContextSize(ttsbs.appConfig.ContextSize),
+ config.LoadOptionF16(ttsbs.appConfig.F16),
+ )
+ if err != nil {
+ responseChannel <- concurrency.ErrorOr[*string]{Error: err}
+ close(responseChannel)
+ return
+ }
+
+ if request.Backend != "" {
+ cfg.Backend = request.Backend
+ }
+
+ outFile, _, err := modelTTS(cfg.Backend, request.Input, cfg.Model, request.Voice, ttsbs.ml, ttsbs.appConfig, cfg)
+ if err != nil {
+ responseChannel <- concurrency.ErrorOr[*string]{Error: err}
+ close(responseChannel)
+ return
+ }
+ responseChannel <- concurrency.ErrorOr[*string]{Value: &outFile}
+ close(responseChannel)
+ }(request)
+ return responseChannel
+}
+
+func modelTTS(backend, text, modelFile string, voice string, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig *config.BackendConfig) (string, *proto.Result, error) {
bb := backend
if bb == "" {
bb = model.PiperBackend
@@ -37,7 +68,7 @@ func ModelTTS(backend, text, modelFile, voice string, loader *model.ModelLoader,
grpcOpts := gRPCModelOpts(backendConfig)
- opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
+ opts := modelOpts(&config.BackendConfig{}, appConfig, []model.Option{
model.WithBackendString(bb),
model.WithModel(modelFile),
model.WithContext(appConfig.Context),
@@ -87,3 +118,19 @@ func ModelTTS(backend, text, modelFile, voice string, loader *model.ModelLoader,
return filePath, res, err
}
+
+func generateUniqueFileName(dir, baseName, ext string) string {
+ counter := 1
+ fileName := baseName + ext
+
+ for {
+ filePath := filepath.Join(dir, fileName)
+ _, err := os.Stat(filePath)
+ if os.IsNotExist(err) {
+ return fileName
+ }
+
+ counter++
+ fileName = fmt.Sprintf("%s_%d%s", baseName, counter, ext)
+ }
+}
diff --git a/core/cli/run.go b/core/cli/run.go
index 09d09979..c3b186c0 100644
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -124,11 +124,11 @@ func (r *RunCMD) Run(ctx *Context) error {
}
if r.PreloadBackendOnly {
- _, _, _, err := startup.Startup(opts...)
+ _, err := startup.Startup(opts...)
return err
}
- cl, ml, options, err := startup.Startup(opts...)
+ application, err := startup.Startup(opts...)
if err != nil {
return fmt.Errorf("failed basic startup tasks with error %s", err.Error())
@@ -137,7 +137,7 @@ func (r *RunCMD) Run(ctx *Context) error {
// Watch the configuration directory
// If the directory does not exist, we don't watch it
if _, err := os.Stat(r.LocalaiConfigDir); err == nil {
- closeConfigWatcherFn, err := startup.WatchConfigDirectory(r.LocalaiConfigDir, options)
+ closeConfigWatcherFn, err := startup.WatchConfigDirectory(r.LocalaiConfigDir, application.ApplicationConfig)
defer closeConfigWatcherFn()
if err != nil {
@@ -145,7 +145,7 @@ func (r *RunCMD) Run(ctx *Context) error {
}
}
- appHTTP, err := http.App(cl, ml, options)
+ appHTTP, err := http.App(application)
if err != nil {
log.Error().Err(err).Msg("error during HTTP App construction")
return err
diff --git a/core/cli/transcript.go b/core/cli/transcript.go
index 9f36a77c..f14a1a87 100644
--- a/core/cli/transcript.go
+++ b/core/cli/transcript.go
@@ -7,6 +7,7 @@ import (
"github.com/go-skynet/LocalAI/core/backend"
"github.com/go-skynet/LocalAI/core/config"
+ "github.com/go-skynet/LocalAI/core/schema"
"github.com/go-skynet/LocalAI/pkg/model"
)
@@ -43,11 +44,21 @@ func (t *TranscriptCMD) Run(ctx *Context) error {
defer ml.StopAllGRPC()
- tr, err := backend.ModelTranscription(t.Filename, t.Language, ml, c, opts)
- if err != nil {
- return err
+ tbs := backend.NewTranscriptionBackendService(ml, cl, opts)
+
+ resultChannel := tbs.Transcribe(&schema.OpenAIRequest{
+ PredictionOptions: schema.PredictionOptions{
+ Language: t.Language,
+ },
+ File: t.Filename,
+ })
+
+ r := <-resultChannel
+
+ if r.Error != nil {
+ return r.Error
}
- for _, segment := range tr.Segments {
+ for _, segment := range r.Value.Segments {
fmt.Println(segment.Start.String(), "-", segment.Text)
}
return nil
diff --git a/core/cli/tts.go b/core/cli/tts.go
index 1d8fd3a3..c7758c48 100644
--- a/core/cli/tts.go
+++ b/core/cli/tts.go
@@ -9,6 +9,7 @@ import (
"github.com/go-skynet/LocalAI/core/backend"
"github.com/go-skynet/LocalAI/core/config"
+ "github.com/go-skynet/LocalAI/core/schema"
"github.com/go-skynet/LocalAI/pkg/model"
)
@@ -42,20 +43,29 @@ func (t *TTSCMD) Run(ctx *Context) error {
defer ml.StopAllGRPC()
- options := config.BackendConfig{}
- options.SetDefaults()
+ ttsbs := backend.NewTextToSpeechBackendService(ml, config.NewBackendConfigLoader(), opts)
- filePath, _, err := backend.ModelTTS(t.Backend, text, t.Model, t.Voice, ml, opts, options)
- if err != nil {
- return err
+ request := &schema.TTSRequest{
+ Model: t.Model,
+ Input: text,
+ Backend: t.Backend,
+ Voice: t.Voice,
+ }
+
+ resultsChannel := ttsbs.TextToAudioFile(request)
+
+ rawResult := <-resultsChannel
+
+ if rawResult.Error != nil {
+ return rawResult.Error
}
if outputFile != "" {
- if err := os.Rename(filePath, outputFile); err != nil {
+ if err := os.Rename(*rawResult.Value, outputFile); err != nil {
return err
}
- fmt.Printf("Generate file %s\n", outputFile)
+ fmt.Printf("Generated file %q\n", outputFile)
} else {
- fmt.Printf("Generate file %s\n", filePath)
+ fmt.Printf("Generated file %q\n", *rawResult.Value)
}
return nil
}
diff --git a/core/config/backend_config.go b/core/config/backend_config.go
index 81c92d01..47e4829d 100644
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -1,22 +1,7 @@
package config
import (
- "errors"
- "fmt"
- "io/fs"
- "os"
- "path/filepath"
- "sort"
- "strings"
- "sync"
-
"github.com/go-skynet/LocalAI/core/schema"
- "github.com/go-skynet/LocalAI/pkg/downloader"
- "github.com/go-skynet/LocalAI/pkg/utils"
- "github.com/rs/zerolog/log"
- "gopkg.in/yaml.v3"
-
- "github.com/charmbracelet/glamour"
)
const (
@@ -199,7 +184,7 @@ func (c *BackendConfig) FunctionToCall() string {
}
func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
- lo := &LoadOptions{}
+ lo := &ConfigLoaderOptions{}
lo.Apply(opts...)
ctx := lo.ctxSize
@@ -312,287 +297,3 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
cfg.Debug = &trueV
}
}
-
-////// Config Loader ////////
-
-type BackendConfigLoader struct {
- configs map[string]BackendConfig
- sync.Mutex
-}
-
-type LoadOptions struct {
- debug bool
- threads, ctxSize int
- f16 bool
-}
-
-func LoadOptionDebug(debug bool) ConfigLoaderOption {
- return func(o *LoadOptions) {
- o.debug = debug
- }
-}
-
-func LoadOptionThreads(threads int) ConfigLoaderOption {
- return func(o *LoadOptions) {
- o.threads = threads
- }
-}
-
-func LoadOptionContextSize(ctxSize int) ConfigLoaderOption {
- return func(o *LoadOptions) {
- o.ctxSize = ctxSize
- }
-}
-
-func LoadOptionF16(f16 bool) ConfigLoaderOption {
- return func(o *LoadOptions) {
- o.f16 = f16
- }
-}
-
-type ConfigLoaderOption func(*LoadOptions)
-
-func (lo *LoadOptions) Apply(options ...ConfigLoaderOption) {
- for _, l := range options {
- l(lo)
- }
-}
-
-// Load a config file for a model
-func (cl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath string, opts ...ConfigLoaderOption) (*BackendConfig, error) {
-
- // Load a config file if present after the model name
- cfg := &BackendConfig{
- PredictionOptions: schema.PredictionOptions{
- Model: modelName,
- },
- }
-
- cfgExisting, exists := cl.GetBackendConfig(modelName)
- if exists {
- cfg = &cfgExisting
- } else {
- // Try loading a model config file
- modelConfig := filepath.Join(modelPath, modelName+".yaml")
- if _, err := os.Stat(modelConfig); err == nil {
- if err := cl.LoadBackendConfig(
- modelConfig, opts...,
- ); err != nil {
- return nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
- }
- cfgExisting, exists = cl.GetBackendConfig(modelName)
- if exists {
- cfg = &cfgExisting
- }
- }
- }
-
- cfg.SetDefaults(opts...)
-
- return cfg, nil
-}
-
-func NewBackendConfigLoader() *BackendConfigLoader {
- return &BackendConfigLoader{
- configs: make(map[string]BackendConfig),
- }
-}
-func ReadBackendConfigFile(file string, opts ...ConfigLoaderOption) ([]*BackendConfig, error) {
- c := &[]*BackendConfig{}
- f, err := os.ReadFile(file)
- if err != nil {
- return nil, fmt.Errorf("cannot read config file: %w", err)
- }
- if err := yaml.Unmarshal(f, c); err != nil {
- return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
- }
-
- for _, cc := range *c {
- cc.SetDefaults(opts...)
- }
-
- return *c, nil
-}
-
-func ReadBackendConfig(file string, opts ...ConfigLoaderOption) (*BackendConfig, error) {
- lo := &LoadOptions{}
- lo.Apply(opts...)
-
- c := &BackendConfig{}
- f, err := os.ReadFile(file)
- if err != nil {
- return nil, fmt.Errorf("cannot read config file: %w", err)
- }
- if err := yaml.Unmarshal(f, c); err != nil {
- return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
- }
-
- c.SetDefaults(opts...)
- return c, nil
-}
-
-func (cm *BackendConfigLoader) LoadBackendConfigFile(file string, opts ...ConfigLoaderOption) error {
- cm.Lock()
- defer cm.Unlock()
- c, err := ReadBackendConfigFile(file, opts...)
- if err != nil {
- return fmt.Errorf("cannot load config file: %w", err)
- }
-
- for _, cc := range c {
- cm.configs[cc.Name] = *cc
- }
- return nil
-}
-
-func (cl *BackendConfigLoader) LoadBackendConfig(file string, opts ...ConfigLoaderOption) error {
- cl.Lock()
- defer cl.Unlock()
- c, err := ReadBackendConfig(file, opts...)
- if err != nil {
- return fmt.Errorf("cannot read config file: %w", err)
- }
-
- cl.configs[c.Name] = *c
- return nil
-}
-
-func (cl *BackendConfigLoader) GetBackendConfig(m string) (BackendConfig, bool) {
- cl.Lock()
- defer cl.Unlock()
- v, exists := cl.configs[m]
- return v, exists
-}
-
-func (cl *BackendConfigLoader) GetAllBackendConfigs() []BackendConfig {
- cl.Lock()
- defer cl.Unlock()
- var res []BackendConfig
- for _, v := range cl.configs {
- res = append(res, v)
- }
-
- sort.SliceStable(res, func(i, j int) bool {
- return res[i].Name < res[j].Name
- })
-
- return res
-}
-
-func (cl *BackendConfigLoader) ListBackendConfigs() []string {
- cl.Lock()
- defer cl.Unlock()
- var res []string
- for k := range cl.configs {
- res = append(res, k)
- }
- return res
-}
-
-// Preload prepare models if they are not local but url or huggingface repositories
-func (cl *BackendConfigLoader) Preload(modelPath string) error {
- cl.Lock()
- defer cl.Unlock()
-
- status := func(fileName, current, total string, percent float64) {
- utils.DisplayDownloadFunction(fileName, current, total, percent)
- }
-
- log.Info().Msgf("Preloading models from %s", modelPath)
-
- renderMode := "dark"
- if os.Getenv("COLOR") != "" {
- renderMode = os.Getenv("COLOR")
- }
-
- glamText := func(t string) {
- out, err := glamour.Render(t, renderMode)
- if err == nil && os.Getenv("NO_COLOR") == "" {
- fmt.Println(out)
- } else {
- fmt.Println(t)
- }
- }
-
- for i, config := range cl.configs {
-
- // Download files and verify their SHA
- for _, file := range config.DownloadFiles {
- log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename)
-
- if err := utils.VerifyPath(file.Filename, modelPath); err != nil {
- return err
- }
- // Create file path
- filePath := filepath.Join(modelPath, file.Filename)
-
- if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, status); err != nil {
- return err
- }
- }
-
- modelURL := config.PredictionOptions.Model
- modelURL = downloader.ConvertURL(modelURL)
-
- if downloader.LooksLikeURL(modelURL) {
- // md5 of model name
- md5Name := utils.MD5(modelURL)
-
- // check if file exists
- if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) {
- err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", status)
- if err != nil {
- return err
- }
- }
-
- cc := cl.configs[i]
- c := &cc
- c.PredictionOptions.Model = md5Name
- cl.configs[i] = *c
- }
- if cl.configs[i].Name != "" {
- glamText(fmt.Sprintf("**Model name**: _%s_", cl.configs[i].Name))
- }
- if cl.configs[i].Description != "" {
- //glamText("**Description**")
- glamText(cl.configs[i].Description)
- }
- if cl.configs[i].Usage != "" {
- //glamText("**Usage**")
- glamText(cl.configs[i].Usage)
- }
- }
- return nil
-}
-
-// LoadBackendConfigsFromPath reads all the configurations of the models from a path
-// (non-recursive)
-func (cm *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...ConfigLoaderOption) error {
- cm.Lock()
- defer cm.Unlock()
- entries, err := os.ReadDir(path)
- if err != nil {
- return err
- }
- files := make([]fs.FileInfo, 0, len(entries))
- for _, entry := range entries {
- info, err := entry.Info()
- if err != nil {
- return err
- }
- files = append(files, info)
- }
- for _, file := range files {
- // Skip templates, YAML and .keep files
- if !strings.Contains(file.Name(), ".yaml") && !strings.Contains(file.Name(), ".yml") {
- continue
- }
- c, err := ReadBackendConfig(filepath.Join(path, file.Name()), opts...)
- if err == nil {
- cm.configs[c.Name] = *c
- }
- }
-
- return nil
-}
diff --git a/core/config/backend_config_loader.go b/core/config/backend_config_loader.go
new file mode 100644
index 00000000..62dfc1e0
--- /dev/null
+++ b/core/config/backend_config_loader.go
@@ -0,0 +1,509 @@
+package config
+
+import (
+ "encoding/json"
+ "errors"
+ "fmt"
+ "io/fs"
+ "os"
+ "path/filepath"
+ "sort"
+ "strings"
+ "sync"
+
+ "github.com/charmbracelet/glamour"
+ "github.com/go-skynet/LocalAI/core/schema"
+ "github.com/go-skynet/LocalAI/pkg/downloader"
+ "github.com/go-skynet/LocalAI/pkg/grammar"
+ "github.com/go-skynet/LocalAI/pkg/utils"
+ "github.com/rs/zerolog/log"
+ "gopkg.in/yaml.v2"
+)
+
+type BackendConfigLoader struct {
+ configs map[string]BackendConfig
+ sync.Mutex
+}
+
+type ConfigLoaderOptions struct {
+ debug bool
+ threads, ctxSize int
+ f16 bool
+}
+
+func LoadOptionDebug(debug bool) ConfigLoaderOption {
+ return func(o *ConfigLoaderOptions) {
+ o.debug = debug
+ }
+}
+
+func LoadOptionThreads(threads int) ConfigLoaderOption {
+ return func(o *ConfigLoaderOptions) {
+ o.threads = threads
+ }
+}
+
+func LoadOptionContextSize(ctxSize int) ConfigLoaderOption {
+ return func(o *ConfigLoaderOptions) {
+ o.ctxSize = ctxSize
+ }
+}
+
+func LoadOptionF16(f16 bool) ConfigLoaderOption {
+ return func(o *ConfigLoaderOptions) {
+ o.f16 = f16
+ }
+}
+
+type ConfigLoaderOption func(*ConfigLoaderOptions)
+
+func (lo *ConfigLoaderOptions) Apply(options ...ConfigLoaderOption) {
+ for _, l := range options {
+ l(lo)
+ }
+}
+
+func NewBackendConfigLoader() *BackendConfigLoader {
+ return &BackendConfigLoader{
+ configs: make(map[string]BackendConfig),
+ }
+}
+
+func (bcl *BackendConfigLoader) LoadBackendConfig(file string, opts ...ConfigLoaderOption) error {
+ bcl.Lock()
+ defer bcl.Unlock()
+ c, err := readBackendConfig(file, opts...)
+ if err != nil {
+ return fmt.Errorf("cannot read config file: %w", err)
+ }
+
+ bcl.configs[c.Name] = *c
+ return nil
+}
+
+func (bcl *BackendConfigLoader) GetBackendConfig(m string) (BackendConfig, bool) {
+ bcl.Lock()
+ defer bcl.Unlock()
+ v, exists := bcl.configs[m]
+ return v, exists
+}
+
+func (bcl *BackendConfigLoader) GetAllBackendConfigs() []BackendConfig {
+ bcl.Lock()
+ defer bcl.Unlock()
+ var res []BackendConfig
+ for _, v := range bcl.configs {
+ res = append(res, v)
+ }
+ sort.SliceStable(res, func(i, j int) bool {
+ return res[i].Name < res[j].Name
+ })
+ return res
+}
+
+func (bcl *BackendConfigLoader) ListBackendConfigs() []string {
+ bcl.Lock()
+ defer bcl.Unlock()
+ var res []string
+ for k := range bcl.configs {
+ res = append(res, k)
+ }
+ return res
+}
+
+// Preload prepare models if they are not local but url or huggingface repositories
+func (bcl *BackendConfigLoader) Preload(modelPath string) error {
+ bcl.Lock()
+ defer bcl.Unlock()
+
+ status := func(fileName, current, total string, percent float64) {
+ utils.DisplayDownloadFunction(fileName, current, total, percent)
+ }
+
+ log.Info().Msgf("Preloading models from %s", modelPath)
+
+ renderMode := "dark"
+ if os.Getenv("COLOR") != "" {
+ renderMode = os.Getenv("COLOR")
+ }
+
+ glamText := func(t string) {
+ out, err := glamour.Render(t, renderMode)
+ if err == nil && os.Getenv("NO_COLOR") == "" {
+ fmt.Println(out)
+ } else {
+ fmt.Println(t)
+ }
+ }
+
+ for i, config := range bcl.configs {
+
+ // Download files and verify their SHA
+ for _, file := range config.DownloadFiles {
+ log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename)
+
+ if err := utils.VerifyPath(file.Filename, modelPath); err != nil {
+ return err
+ }
+ // Create file path
+ filePath := filepath.Join(modelPath, file.Filename)
+
+ if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, status); err != nil {
+ return err
+ }
+ }
+
+ modelURL := config.PredictionOptions.Model
+ modelURL = downloader.ConvertURL(modelURL)
+
+ if downloader.LooksLikeURL(modelURL) {
+ // md5 of model name
+ md5Name := utils.MD5(modelURL)
+
+ // check if file exists
+ if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) {
+ err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", status)
+ if err != nil {
+ return err
+ }
+ }
+
+ cc := bcl.configs[i]
+ c := &cc
+ c.PredictionOptions.Model = md5Name
+ bcl.configs[i] = *c
+ }
+ if bcl.configs[i].Name != "" {
+ glamText(fmt.Sprintf("**Model name**: _%s_", bcl.configs[i].Name))
+ }
+ if bcl.configs[i].Description != "" {
+ //glamText("**Description**")
+ glamText(bcl.configs[i].Description)
+ }
+ if bcl.configs[i].Usage != "" {
+ //glamText("**Usage**")
+ glamText(bcl.configs[i].Usage)
+ }
+ }
+ return nil
+}
+
+func (bcl *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...ConfigLoaderOption) error {
+ bcl.Lock()
+ defer bcl.Unlock()
+ entries, err := os.ReadDir(path)
+ if err != nil {
+ return err
+ }
+ files := make([]fs.FileInfo, 0, len(entries))
+ for _, entry := range entries {
+ info, err := entry.Info()
+ if err != nil {
+ return err
+ }
+ files = append(files, info)
+ }
+ for _, file := range files {
+ // Skip templates, YAML and .keep files
+ if !strings.Contains(file.Name(), ".yaml") && !strings.Contains(file.Name(), ".yml") {
+ continue
+ }
+ c, err := readBackendConfig(filepath.Join(path, file.Name()), opts...)
+ if err == nil {
+ bcl.configs[c.Name] = *c
+ }
+ }
+
+ return nil
+}
+
+func (bcl *BackendConfigLoader) LoadBackendConfigFile(file string, opts ...ConfigLoaderOption) error {
+ bcl.Lock()
+ defer bcl.Unlock()
+ c, err := readBackendConfigFile(file, opts...)
+ if err != nil {
+ return fmt.Errorf("cannot load config file: %w", err)
+ }
+
+ for _, cc := range c {
+ bcl.configs[cc.Name] = *cc
+ }
+ return nil
+}
+
+//////////
+
+// Load a config file for a model
+func (bcl *BackendConfigLoader) LoadBackendConfigFileByName(modelName string, modelPath string, opts ...ConfigLoaderOption) (*BackendConfig, error) {
+
+ // Load a config file if present after the model name
+ cfg := &BackendConfig{
+ PredictionOptions: schema.PredictionOptions{
+ Model: modelName,
+ },
+ }
+
+ cfgExisting, exists := bcl.GetBackendConfig(modelName)
+ if exists {
+ cfg = &cfgExisting
+ } else {
+ // Load a config file if present after the model name
+ modelConfig := filepath.Join(modelPath, modelName+".yaml")
+ if _, err := os.Stat(modelConfig); err == nil {
+ if err := bcl.LoadBackendConfig(modelConfig); err != nil {
+ return nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
+ }
+ cfgExisting, exists = bcl.GetBackendConfig(modelName)
+ if exists {
+ cfg = &cfgExisting
+ }
+ }
+ }
+
+ cfg.SetDefaults(opts...)
+ return cfg, nil
+}
+
+func readBackendConfigFile(file string, opts ...ConfigLoaderOption) ([]*BackendConfig, error) {
+ c := &[]*BackendConfig{}
+ f, err := os.ReadFile(file)
+ if err != nil {
+ return nil, fmt.Errorf("cannot read config file: %w", err)
+ }
+ if err := yaml.Unmarshal(f, c); err != nil {
+ return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
+ }
+
+ for _, cc := range *c {
+ cc.SetDefaults(opts...)
+ }
+
+ return *c, nil
+}
+
+func readBackendConfig(file string, opts ...ConfigLoaderOption) (*BackendConfig, error) {
+ c := &BackendConfig{}
+ f, err := os.ReadFile(file)
+ if err != nil {
+ return nil, fmt.Errorf("cannot read config file: %w", err)
+ }
+ if err := yaml.Unmarshal(f, c); err != nil {
+ return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
+ }
+
+ c.SetDefaults(opts...)
+ return c, nil
+}
+
+func (bcl *BackendConfigLoader) LoadBackendConfigForModelAndOpenAIRequest(modelFile string, input *schema.OpenAIRequest, appConfig *ApplicationConfig) (*BackendConfig, *schema.OpenAIRequest, error) {
+ cfg, err := bcl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
+ LoadOptionContextSize(appConfig.ContextSize),
+ LoadOptionDebug(appConfig.Debug),
+ LoadOptionF16(appConfig.F16),
+ LoadOptionThreads(appConfig.Threads),
+ )
+
+ // Set the parameters for the language model prediction
+ updateBackendConfigFromOpenAIRequest(cfg, input)
+
+ return cfg, input, err
+}
+
+func updateBackendConfigFromOpenAIRequest(bc *BackendConfig, request *schema.OpenAIRequest) {
+ if request.Echo {
+ bc.Echo = request.Echo
+ }
+ if request.TopK != nil && *request.TopK != 0 {
+ bc.TopK = request.TopK
+ }
+ if request.TopP != nil && *request.TopP != 0 {
+ bc.TopP = request.TopP
+ }
+
+ if request.Backend != "" {
+ bc.Backend = request.Backend
+ }
+
+ if request.ClipSkip != 0 {
+ bc.Diffusers.ClipSkip = request.ClipSkip
+ }
+
+ if request.ModelBaseName != "" {
+ bc.AutoGPTQ.ModelBaseName = request.ModelBaseName
+ }
+
+ if request.NegativePromptScale != 0 {
+ bc.NegativePromptScale = request.NegativePromptScale
+ }
+
+ if request.UseFastTokenizer {
+ bc.UseFastTokenizer = request.UseFastTokenizer
+ }
+
+ if request.NegativePrompt != "" {
+ bc.NegativePrompt = request.NegativePrompt
+ }
+
+ if request.RopeFreqBase != 0 {
+ bc.RopeFreqBase = request.RopeFreqBase
+ }
+
+ if request.RopeFreqScale != 0 {
+ bc.RopeFreqScale = request.RopeFreqScale
+ }
+
+ if request.Grammar != "" {
+ bc.Grammar = request.Grammar
+ }
+
+ if request.Temperature != nil && *request.Temperature != 0 {
+ bc.Temperature = request.Temperature
+ }
+
+ if request.Maxtokens != nil && *request.Maxtokens != 0 {
+ bc.Maxtokens = request.Maxtokens
+ }
+
+ switch stop := request.Stop.(type) {
+ case string:
+ if stop != "" {
+ bc.StopWords = append(bc.StopWords, stop)
+ }
+ case []interface{}:
+ for _, pp := range stop {
+ if s, ok := pp.(string); ok {
+ bc.StopWords = append(bc.StopWords, s)
+ }
+ }
+ }
+
+ if len(request.Tools) > 0 {
+ for _, tool := range request.Tools {
+ request.Functions = append(request.Functions, tool.Function)
+ }
+ }
+
+ if request.ToolsChoice != nil {
+ var toolChoice grammar.Tool
+ switch content := request.ToolsChoice.(type) {
+ case string:
+ _ = json.Unmarshal([]byte(content), &toolChoice)
+ case map[string]interface{}:
+ dat, _ := json.Marshal(content)
+ _ = json.Unmarshal(dat, &toolChoice)
+ }
+ request.FunctionCall = map[string]interface{}{
+ "name": toolChoice.Function.Name,
+ }
+ }
+
+ // Decode each request's message content
+ index := 0
+ for i, m := range request.Messages {
+ switch content := m.Content.(type) {
+ case string:
+ request.Messages[i].StringContent = content
+ case []interface{}:
+ dat, _ := json.Marshal(content)
+ c := []schema.Content{}
+ json.Unmarshal(dat, &c)
+ for _, pp := range c {
+ if pp.Type == "text" {
+ request.Messages[i].StringContent = pp.Text
+ } else if pp.Type == "image_url" {
+ // Detect if pp.ImageURL is an URL, if it is download the image and encode it in base64:
+ base64, err := utils.GetImageURLAsBase64(pp.ImageURL.URL)
+ if err == nil {
+ request.Messages[i].StringImages = append(request.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff
+ // set a placeholder for each image
+ request.Messages[i].StringContent = fmt.Sprintf("[img-%d]", index) + request.Messages[i].StringContent
+ index++
+ } else {
+ fmt.Print("Failed encoding image", err)
+ }
+ }
+ }
+ }
+ }
+
+ if request.RepeatPenalty != 0 {
+ bc.RepeatPenalty = request.RepeatPenalty
+ }
+
+ if request.FrequencyPenalty != 0 {
+ bc.FrequencyPenalty = request.FrequencyPenalty
+ }
+
+ if request.PresencePenalty != 0 {
+ bc.PresencePenalty = request.PresencePenalty
+ }
+
+ if request.Keep != 0 {
+ bc.Keep = request.Keep
+ }
+
+ if request.Batch != 0 {
+ bc.Batch = request.Batch
+ }
+
+ if request.IgnoreEOS {
+ bc.IgnoreEOS = request.IgnoreEOS
+ }
+
+ if request.Seed != nil {
+ bc.Seed = request.Seed
+ }
+
+ if request.TypicalP != nil {
+ bc.TypicalP = request.TypicalP
+ }
+
+ switch inputs := request.Input.(type) {
+ case string:
+ if inputs != "" {
+ bc.InputStrings = append(bc.InputStrings, inputs)
+ }
+ case []interface{}:
+ for _, pp := range inputs {
+ switch i := pp.(type) {
+ case string:
+ bc.InputStrings = append(bc.InputStrings, i)
+ case []interface{}:
+ tokens := []int{}
+ for _, ii := range i {
+ tokens = append(tokens, int(ii.(float64)))
+ }
+ bc.InputToken = append(bc.InputToken, tokens)
+ }
+ }
+ }
+
+ // Can be either a string or an object
+ switch fnc := request.FunctionCall.(type) {
+ case string:
+ if fnc != "" {
+ bc.SetFunctionCallString(fnc)
+ }
+ case map[string]interface{}:
+ var name string
+ n, exists := fnc["name"]
+ if exists {
+ nn, e := n.(string)
+ if e {
+ name = nn
+ }
+ }
+ bc.SetFunctionCallNameString(name)
+ }
+
+ switch p := request.Prompt.(type) {
+ case string:
+ bc.PromptStrings = append(bc.PromptStrings, p)
+ case []interface{}:
+ for _, pp := range p {
+ if s, ok := pp.(string); ok {
+ bc.PromptStrings = append(bc.PromptStrings, s)
+ }
+ }
+ }
+}
diff --git a/core/config/exports_test.go b/core/config/exports_test.go
new file mode 100644
index 00000000..70ba84e6
--- /dev/null
+++ b/core/config/exports_test.go
@@ -0,0 +1,6 @@
+package config
+
+// This file re-exports private functions to be used directly in unit tests.
+// Since this file's name ends in _test.go, theoretically these should not be exposed past the tests.
+
+var ReadBackendConfigFile = readBackendConfigFile
diff --git a/core/http/api.go b/core/http/api.go
index af38512a..5c9095ea 100644
--- a/core/http/api.go
+++ b/core/http/api.go
@@ -1,23 +1,20 @@
package http
import (
- "encoding/json"
"errors"
- "os"
"strings"
- "github.com/go-skynet/LocalAI/pkg/utils"
+ "github.com/go-skynet/LocalAI/core"
+ fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
"github.com/gofiber/swagger" // swagger handler
"github.com/go-skynet/LocalAI/core/http/endpoints/elevenlabs"
"github.com/go-skynet/LocalAI/core/http/endpoints/localai"
"github.com/go-skynet/LocalAI/core/http/endpoints/openai"
-
- "github.com/go-skynet/LocalAI/core/config"
"github.com/go-skynet/LocalAI/core/schema"
"github.com/go-skynet/LocalAI/core/services"
"github.com/go-skynet/LocalAI/internal"
- "github.com/go-skynet/LocalAI/pkg/model"
+ model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/gofiber/fiber/v2"
"github.com/gofiber/fiber/v2/middleware/cors"
@@ -55,13 +52,12 @@ func readAuthHeader(c *fiber.Ctx) string {
// @securityDefinitions.apikey BearerAuth
// @in header
// @name Authorization
-
-func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (*fiber.App, error) {
+func App(application *core.Application) (*fiber.App, error) {
// Return errors as JSON responses
app := fiber.New(fiber.Config{
Views: renderEngine(),
- BodyLimit: appConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
- DisableStartupMessage: appConfig.DisableMessage,
+ BodyLimit: application.ApplicationConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
+ DisableStartupMessage: application.ApplicationConfig.DisableMessage,
// Override default error handler
ErrorHandler: func(ctx *fiber.Ctx, err error) error {
// Status code defaults to 500
@@ -82,7 +78,7 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
},
})
- if appConfig.Debug {
+ if application.ApplicationConfig.Debug {
app.Use(logger.New(logger.Config{
Format: "[${ip}]:${port} ${status} - ${method} ${path}\n",
}))
@@ -90,7 +86,7 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
// Default middleware config
- if !appConfig.Debug {
+ if !application.ApplicationConfig.Debug {
app.Use(recover.New())
}
@@ -108,27 +104,27 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
// Auth middleware checking if API key is valid. If no API key is set, no auth is required.
auth := func(c *fiber.Ctx) error {
- if len(appConfig.ApiKeys) == 0 {
+ if len(application.ApplicationConfig.ApiKeys) == 0 {
return c.Next()
}
- // Check for api_keys.json file
- fileContent, err := os.ReadFile("api_keys.json")
- if err == nil {
- // Parse JSON content from the file
- var fileKeys []string
- err := json.Unmarshal(fileContent, &fileKeys)
- if err != nil {
- return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{"message": "Error parsing api_keys.json"})
- }
+ // // Check for api_keys.json file
+ // fileContent, err := os.ReadFile("api_keys.json")
+ // if err == nil {
+ // // Parse JSON content from the file
+ // var fileKeys []string
+ // err := json.Unmarshal(fileContent, &fileKeys)
+ // if err != nil {
+ // return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{"message": "Error parsing api_keys.json"})
+ // }
- // Add file keys to options.ApiKeys
- appConfig.ApiKeys = append(appConfig.ApiKeys, fileKeys...)
- }
+ // // Add file keys to options.ApiKeys
+ // application.ApplicationConfig.ApiKeys = append(application.ApplicationConfig.ApiKeys, fileKeys...)
+ // }
- if len(appConfig.ApiKeys) == 0 {
- return c.Next()
- }
+ // if len(application.ApplicationConfig.ApiKeys) == 0 {
+ // return c.Next()
+ // }
authHeader := readAuthHeader(c)
if authHeader == "" {
@@ -142,7 +138,7 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
}
apiKey := authHeaderParts[1]
- for _, key := range appConfig.ApiKeys {
+ for _, key := range application.ApplicationConfig.ApiKeys {
if apiKey == key {
return c.Next()
}
@@ -151,20 +147,22 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid API key"})
}
- if appConfig.CORS {
+ if application.ApplicationConfig.CORS {
var c func(ctx *fiber.Ctx) error
- if appConfig.CORSAllowOrigins == "" {
+ if application.ApplicationConfig.CORSAllowOrigins == "" {
c = cors.New()
} else {
- c = cors.New(cors.Config{AllowOrigins: appConfig.CORSAllowOrigins})
+ c = cors.New(cors.Config{AllowOrigins: application.ApplicationConfig.CORSAllowOrigins})
}
app.Use(c)
}
+ fiberContextExtractor := fiberContext.NewFiberContextExtractor(application.ModelLoader, application.ApplicationConfig)
+
// LocalAI API endpoints
- galleryService := services.NewGalleryService(appConfig.ModelPath)
- galleryService.Start(appConfig.Context, cl)
+ galleryService := services.NewGalleryService(application.ApplicationConfig.ModelPath)
+ galleryService.Start(application.ApplicationConfig.Context, application.BackendConfigLoader)
app.Get("/version", auth, func(c *fiber.Ctx) error {
return c.JSON(struct {
@@ -172,29 +170,17 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
}{Version: internal.PrintableVersion()})
})
- // Make sure directories exists
- os.MkdirAll(appConfig.ImageDir, 0755)
- os.MkdirAll(appConfig.AudioDir, 0755)
- os.MkdirAll(appConfig.UploadDir, 0755)
- os.MkdirAll(appConfig.ConfigsDir, 0755)
- os.MkdirAll(appConfig.ModelPath, 0755)
-
- // Load config jsons
- utils.LoadConfig(appConfig.UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles)
- utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants)
- utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles)
-
app.Get("/swagger/*", swagger.HandlerDefault) // default
welcomeRoute(
app,
- cl,
- ml,
- appConfig,
+ application.BackendConfigLoader,
+ application.ModelLoader,
+ application.ApplicationConfig,
auth,
)
- modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService)
+ modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(application.ApplicationConfig.Galleries, application.ApplicationConfig.ModelPath, galleryService)
app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint())
app.Get("/models/available", auth, modelGalleryEndpointService.ListModelFromGalleryEndpoint())
app.Get("/models/galleries", auth, modelGalleryEndpointService.ListModelGalleriesEndpoint())
@@ -203,83 +189,85 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
app.Get("/models/jobs/:uuid", auth, modelGalleryEndpointService.GetOpStatusEndpoint())
app.Get("/models/jobs", auth, modelGalleryEndpointService.GetAllStatusEndpoint())
- app.Post("/tts", auth, localai.TTSEndpoint(cl, ml, appConfig))
-
- // Elevenlabs
- app.Post("/v1/text-to-speech/:voice-id", auth, elevenlabs.TTSEndpoint(cl, ml, appConfig))
-
// Stores
- sl := model.NewModelLoader("")
- app.Post("/stores/set", auth, localai.StoresSetEndpoint(sl, appConfig))
- app.Post("/stores/delete", auth, localai.StoresDeleteEndpoint(sl, appConfig))
- app.Post("/stores/get", auth, localai.StoresGetEndpoint(sl, appConfig))
- app.Post("/stores/find", auth, localai.StoresFindEndpoint(sl, appConfig))
+ storeLoader := model.NewModelLoader("") // TODO: Investigate if this should be migrated to application and reused. Should the path be configurable? Merging for now.
+ app.Post("/stores/set", auth, localai.StoresSetEndpoint(storeLoader, application.ApplicationConfig))
+ app.Post("/stores/delete", auth, localai.StoresDeleteEndpoint(storeLoader, application.ApplicationConfig))
+ app.Post("/stores/get", auth, localai.StoresGetEndpoint(storeLoader, application.ApplicationConfig))
+ app.Post("/stores/find", auth, localai.StoresFindEndpoint(storeLoader, application.ApplicationConfig))
- // openAI compatible API endpoint
+ // openAI compatible API endpoints
// chat
- app.Post("/v1/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig))
- app.Post("/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig))
+ app.Post("/v1/chat/completions", auth, openai.ChatEndpoint(fiberContextExtractor, application.OpenAIService))
+ app.Post("/chat/completions", auth, openai.ChatEndpoint(fiberContextExtractor, application.OpenAIService))
// edit
- app.Post("/v1/edits", auth, openai.EditEndpoint(cl, ml, appConfig))
- app.Post("/edits", auth, openai.EditEndpoint(cl, ml, appConfig))
+ app.Post("/v1/edits", auth, openai.EditEndpoint(fiberContextExtractor, application.OpenAIService))
+ app.Post("/edits", auth, openai.EditEndpoint(fiberContextExtractor, application.OpenAIService))
// assistant
- app.Get("/v1/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig))
- app.Get("/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig))
- app.Post("/v1/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig))
- app.Post("/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig))
- app.Delete("/v1/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig))
- app.Delete("/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig))
- app.Get("/v1/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig))
- app.Get("/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig))
- app.Post("/v1/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig))
- app.Post("/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig))
- app.Get("/v1/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
- app.Get("/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
- app.Post("/v1/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
- app.Post("/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
- app.Delete("/v1/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
- app.Delete("/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
- app.Get("/v1/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig))
- app.Get("/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig))
+ // TODO: Refactor this to the new style eventually
+ app.Get("/v1/assistants", auth, openai.ListAssistantsEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig))
+ app.Get("/assistants", auth, openai.ListAssistantsEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig))
+ app.Post("/v1/assistants", auth, openai.CreateAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig))
+ app.Post("/assistants", auth, openai.CreateAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig))
+ app.Delete("/v1/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig))
+ app.Delete("/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig))
+ app.Get("/v1/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig))
+ app.Get("/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig))
+ app.Post("/v1/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig))
+ app.Post("/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig))
+ app.Get("/v1/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig))
+ app.Get("/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig))
+ app.Post("/v1/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig))
+ app.Post("/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig))
+ app.Delete("/v1/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig))
+ app.Delete("/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig))
+ app.Get("/v1/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig))
+ app.Get("/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig))
// files
- app.Post("/v1/files", auth, openai.UploadFilesEndpoint(cl, appConfig))
- app.Post("/files", auth, openai.UploadFilesEndpoint(cl, appConfig))
- app.Get("/v1/files", auth, openai.ListFilesEndpoint(cl, appConfig))
- app.Get("/files", auth, openai.ListFilesEndpoint(cl, appConfig))
- app.Get("/v1/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig))
- app.Get("/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig))
- app.Delete("/v1/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig))
- app.Delete("/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig))
- app.Get("/v1/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig))
- app.Get("/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig))
+ app.Post("/v1/files", auth, openai.UploadFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig))
+ app.Post("/files", auth, openai.UploadFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig))
+ app.Get("/v1/files", auth, openai.ListFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig))
+ app.Get("/files", auth, openai.ListFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig))
+ app.Get("/v1/files/:file_id", auth, openai.GetFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig))
+ app.Get("/files/:file_id", auth, openai.GetFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig))
+ app.Delete("/v1/files/:file_id", auth, openai.DeleteFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig))
+ app.Delete("/files/:file_id", auth, openai.DeleteFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig))
+ app.Get("/v1/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(application.BackendConfigLoader, application.ApplicationConfig))
+ app.Get("/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(application.BackendConfigLoader, application.ApplicationConfig))
// completion
- app.Post("/v1/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
- app.Post("/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
- app.Post("/v1/engines/:model/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
+ app.Post("/v1/completions", auth, openai.CompletionEndpoint(fiberContextExtractor, application.OpenAIService))
+ app.Post("/completions", auth, openai.CompletionEndpoint(fiberContextExtractor, application.OpenAIService))
+ app.Post("/v1/engines/:model/completions", auth, openai.CompletionEndpoint(fiberContextExtractor, application.OpenAIService))
// embeddings
- app.Post("/v1/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
- app.Post("/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
- app.Post("/v1/engines/:model/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
+ app.Post("/v1/embeddings", auth, openai.EmbeddingsEndpoint(fiberContextExtractor, application.EmbeddingsBackendService))
+ app.Post("/embeddings", auth, openai.EmbeddingsEndpoint(fiberContextExtractor, application.EmbeddingsBackendService))
+ app.Post("/v1/engines/:model/embeddings", auth, openai.EmbeddingsEndpoint(fiberContextExtractor, application.EmbeddingsBackendService))
// audio
- app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(cl, ml, appConfig))
- app.Post("/v1/audio/speech", auth, localai.TTSEndpoint(cl, ml, appConfig))
+ app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(fiberContextExtractor, application.TranscriptionBackendService))
+ app.Post("/v1/audio/speech", auth, localai.TTSEndpoint(fiberContextExtractor, application.TextToSpeechBackendService))
// images
- app.Post("/v1/images/generations", auth, openai.ImageEndpoint(cl, ml, appConfig))
+ app.Post("/v1/images/generations", auth, openai.ImageEndpoint(fiberContextExtractor, application.ImageGenerationBackendService))
- if appConfig.ImageDir != "" {
- app.Static("/generated-images", appConfig.ImageDir)
+ // Elevenlabs
+ app.Post("/v1/text-to-speech/:voice-id", auth, elevenlabs.TTSEndpoint(fiberContextExtractor, application.TextToSpeechBackendService))
+
+ // LocalAI TTS?
+ app.Post("/tts", auth, localai.TTSEndpoint(fiberContextExtractor, application.TextToSpeechBackendService))
+
+ if application.ApplicationConfig.ImageDir != "" {
+ app.Static("/generated-images", application.ApplicationConfig.ImageDir)
}
- if appConfig.AudioDir != "" {
- app.Static("/generated-audio", appConfig.AudioDir)
+ if application.ApplicationConfig.AudioDir != "" {
+ app.Static("/generated-audio", application.ApplicationConfig.AudioDir)
}
ok := func(c *fiber.Ctx) error {
@@ -291,13 +279,12 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
app.Get("/readyz", ok)
// Experimental Backend Statistics Module
- backendMonitor := services.NewBackendMonitor(cl, ml, appConfig) // Split out for now
- app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(backendMonitor))
- app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(backendMonitor))
+ app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(application.BackendMonitorService))
+ app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(application.BackendMonitorService))
// models
- app.Get("/v1/models", auth, openai.ListModelsEndpoint(cl, ml))
- app.Get("/models", auth, openai.ListModelsEndpoint(cl, ml))
+ app.Get("/v1/models", auth, openai.ListModelsEndpoint(application.ListModelsService))
+ app.Get("/models", auth, openai.ListModelsEndpoint(application.ListModelsService))
app.Get("/metrics", auth, localai.LocalAIMetricsEndpoint())
diff --git a/core/http/api_test.go b/core/http/api_test.go
index 1553ed21..bf8feb1c 100644
--- a/core/http/api_test.go
+++ b/core/http/api_test.go
@@ -12,7 +12,9 @@ import (
"os"
"path/filepath"
"runtime"
+ "strings"
+ "github.com/go-skynet/LocalAI/core"
"github.com/go-skynet/LocalAI/core/config"
. "github.com/go-skynet/LocalAI/core/http"
"github.com/go-skynet/LocalAI/core/schema"
@@ -205,9 +207,7 @@ var _ = Describe("API test", func() {
var cancel context.CancelFunc
var tmpdir string
var modelDir string
- var bcl *config.BackendConfigLoader
- var ml *model.ModelLoader
- var applicationConfig *config.ApplicationConfig
+ var application *core.Application
commonOpts := []config.AppOption{
config.WithDebug(true),
@@ -252,7 +252,7 @@ var _ = Describe("API test", func() {
},
}
- bcl, ml, applicationConfig, err = startup.Startup(
+ application, err = startup.Startup(
append(commonOpts,
config.WithContext(c),
config.WithGalleries(galleries),
@@ -261,7 +261,7 @@ var _ = Describe("API test", func() {
config.WithBackendAssetsOutput(backendAssetsDir))...)
Expect(err).ToNot(HaveOccurred())
- app, err = App(bcl, ml, applicationConfig)
+ app, err = App(application)
Expect(err).ToNot(HaveOccurred())
go app.Listen("127.0.0.1:9090")
@@ -474,11 +474,11 @@ var _ = Describe("API test", func() {
})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp2.Choices)).To(Equal(1))
- Expect(resp2.Choices[0].Message.FunctionCall).ToNot(BeNil())
- Expect(resp2.Choices[0].Message.FunctionCall.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.FunctionCall.Name)
+ Expect(resp2.Choices[0].Message.ToolCalls[0].Function).ToNot(BeNil())
+ Expect(resp2.Choices[0].Message.ToolCalls[0].Function.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.ToolCalls[0].Function.Name)
var res map[string]string
- err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res)
+ err = json.Unmarshal([]byte(resp2.Choices[0].Message.ToolCalls[0].Function.Arguments), &res)
Expect(err).ToNot(HaveOccurred())
Expect(res["location"]).To(Equal("San Francisco"), fmt.Sprint(res))
Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
@@ -487,9 +487,9 @@ var _ = Describe("API test", func() {
})
It("runs openllama gguf(llama-cpp)", Label("llama-gguf"), func() {
- if runtime.GOOS != "linux" {
- Skip("test supported only on linux")
- }
+ // if runtime.GOOS != "linux" {
+ // Skip("test supported only on linux")
+ // }
modelName := "codellama"
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
URL: "github:go-skynet/model-gallery/codellama-7b-instruct.yaml",
@@ -504,7 +504,7 @@ var _ = Describe("API test", func() {
Eventually(func() bool {
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
return response["processed"].(bool)
- }, "360s", "10s").Should(Equal(true))
+ }, "480s", "10s").Should(Equal(true))
By("testing chat")
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: modelName, Messages: []openai.ChatCompletionMessage{
@@ -551,11 +551,13 @@ var _ = Describe("API test", func() {
})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp2.Choices)).To(Equal(1))
- Expect(resp2.Choices[0].Message.FunctionCall).ToNot(BeNil())
- Expect(resp2.Choices[0].Message.FunctionCall.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.FunctionCall.Name)
+ fmt.Printf("\n--- %+v\n\n", resp2.Choices[0].Message)
+ Expect(resp2.Choices[0].Message.ToolCalls).ToNot(BeNil())
+ Expect(resp2.Choices[0].Message.ToolCalls[0]).ToNot(BeNil())
+ Expect(resp2.Choices[0].Message.ToolCalls[0].Function.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.ToolCalls[0].Function.Name)
var res map[string]string
- err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res)
+ err = json.Unmarshal([]byte(resp2.Choices[0].Message.ToolCalls[0].Function.Arguments), &res)
Expect(err).ToNot(HaveOccurred())
Expect(res["location"]).To(Equal("San Francisco"), fmt.Sprint(res))
Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
@@ -609,7 +611,7 @@ var _ = Describe("API test", func() {
},
}
- bcl, ml, applicationConfig, err = startup.Startup(
+ application, err = startup.Startup(
append(commonOpts,
config.WithContext(c),
config.WithAudioDir(tmpdir),
@@ -620,7 +622,7 @@ var _ = Describe("API test", func() {
config.WithBackendAssetsOutput(tmpdir))...,
)
Expect(err).ToNot(HaveOccurred())
- app, err = App(bcl, ml, applicationConfig)
+ app, err = App(application)
Expect(err).ToNot(HaveOccurred())
go app.Listen("127.0.0.1:9090")
@@ -724,14 +726,14 @@ var _ = Describe("API test", func() {
var err error
- bcl, ml, applicationConfig, err = startup.Startup(
+ application, err = startup.Startup(
append(commonOpts,
config.WithExternalBackend("huggingface", os.Getenv("HUGGINGFACE_GRPC")),
config.WithContext(c),
config.WithModelPath(modelPath),
)...)
Expect(err).ToNot(HaveOccurred())
- app, err = App(bcl, ml, applicationConfig)
+ app, err = App(application)
Expect(err).ToNot(HaveOccurred())
go app.Listen("127.0.0.1:9090")
@@ -761,6 +763,11 @@ var _ = Describe("API test", func() {
Expect(len(models.Models)).To(Equal(6)) // If "config.yaml" should be included, this should be 8?
})
It("can generate completions via ggml", func() {
+ bt, ok := os.LookupEnv("BUILD_TYPE")
+ if ok && strings.ToLower(bt) == "metal" {
+ Skip("GGML + Metal is known flaky, skip test temporarily")
+ }
+
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel.ggml", Prompt: testPrompt})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
@@ -768,6 +775,11 @@ var _ = Describe("API test", func() {
})
It("can generate chat completions via ggml", func() {
+ bt, ok := os.LookupEnv("BUILD_TYPE")
+ if ok && strings.ToLower(bt) == "metal" {
+ Skip("GGML + Metal is known flaky, skip test temporarily")
+ }
+
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel.ggml", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
@@ -775,6 +787,11 @@ var _ = Describe("API test", func() {
})
It("can generate completions from model configs", func() {
+ bt, ok := os.LookupEnv("BUILD_TYPE")
+ if ok && strings.ToLower(bt) == "metal" {
+ Skip("GGML + Metal is known flaky, skip test temporarily")
+ }
+
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "gpt4all", Prompt: testPrompt})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
@@ -782,6 +799,11 @@ var _ = Describe("API test", func() {
})
It("can generate chat completions from model configs", func() {
+ bt, ok := os.LookupEnv("BUILD_TYPE")
+ if ok && strings.ToLower(bt) == "metal" {
+ Skip("GGML + Metal is known flaky, skip test temporarily")
+ }
+
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
@@ -868,9 +890,9 @@ var _ = Describe("API test", func() {
Context("backends", func() {
It("runs rwkv completion", func() {
- if runtime.GOOS != "linux" {
- Skip("test supported only on linux")
- }
+ // if runtime.GOOS != "linux" {
+ // Skip("test supported only on linux")
+ // }
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "rwkv_test", Prompt: "Count up to five: one, two, three, four,"})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices) > 0).To(BeTrue())
@@ -891,17 +913,20 @@ var _ = Describe("API test", func() {
}
Expect(err).ToNot(HaveOccurred())
- text += response.Choices[0].Text
- tokens++
+
+ if len(response.Choices) > 0 {
+ text += response.Choices[0].Text
+ tokens++
+ }
}
Expect(text).ToNot(BeEmpty())
Expect(text).To(ContainSubstring("five"))
Expect(tokens).ToNot(Or(Equal(1), Equal(0)))
})
It("runs rwkv chat completion", func() {
- if runtime.GOOS != "linux" {
- Skip("test supported only on linux")
- }
+ // if runtime.GOOS != "linux" {
+ // Skip("test supported only on linux")
+ // }
resp, err := client.CreateChatCompletion(context.TODO(),
openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}})
Expect(err).ToNot(HaveOccurred())
@@ -1010,14 +1035,14 @@ var _ = Describe("API test", func() {
c, cancel = context.WithCancel(context.Background())
var err error
- bcl, ml, applicationConfig, err = startup.Startup(
+ application, err = startup.Startup(
append(commonOpts,
config.WithContext(c),
config.WithModelPath(modelPath),
config.WithConfigFile(os.Getenv("CONFIG_FILE")))...,
)
Expect(err).ToNot(HaveOccurred())
- app, err = App(bcl, ml, applicationConfig)
+ app, err = App(application)
Expect(err).ToNot(HaveOccurred())
go app.Listen("127.0.0.1:9090")
@@ -1041,18 +1066,33 @@ var _ = Describe("API test", func() {
}
})
It("can generate chat completions from config file (list1)", func() {
+ bt, ok := os.LookupEnv("BUILD_TYPE")
+ if ok && strings.ToLower(bt) == "metal" {
+ Skip("GGML + Metal is known flaky, skip test temporarily")
+ }
+
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list1", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: testPrompt}}})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
})
It("can generate chat completions from config file (list2)", func() {
+ bt, ok := os.LookupEnv("BUILD_TYPE")
+ if ok && strings.ToLower(bt) == "metal" {
+ Skip("GGML + Metal is known flaky, skip test temporarily")
+ }
+
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list2", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: testPrompt}}})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
})
It("can generate edit completions from config file", func() {
+ bt, ok := os.LookupEnv("BUILD_TYPE")
+ if ok && strings.ToLower(bt) == "metal" {
+ Skip("GGML + Metal is known flaky, skip test temporarily")
+ }
+
request := openaigo.EditCreateRequestBody{
Model: "list2",
Instruction: "foo",
diff --git a/core/http/ctx/fiber.go b/core/http/ctx/fiber.go
index ffb63111..99fbcde9 100644
--- a/core/http/ctx/fiber.go
+++ b/core/http/ctx/fiber.go
@@ -1,43 +1,88 @@
package fiberContext
import (
+ "context"
+ "encoding/json"
"fmt"
"strings"
+ "github.com/go-skynet/LocalAI/core/config"
+ "github.com/go-skynet/LocalAI/core/schema"
"github.com/go-skynet/LocalAI/pkg/model"
"github.com/gofiber/fiber/v2"
"github.com/rs/zerolog/log"
)
+type FiberContextExtractor struct {
+ ml *model.ModelLoader
+ appConfig *config.ApplicationConfig
+}
+
+func NewFiberContextExtractor(ml *model.ModelLoader, appConfig *config.ApplicationConfig) *FiberContextExtractor {
+ return &FiberContextExtractor{
+ ml: ml,
+ appConfig: appConfig,
+ }
+}
+
// ModelFromContext returns the model from the context
// If no model is specified, it will take the first available
// Takes a model string as input which should be the one received from the user request.
// It returns the model name resolved from the context and an error if any.
-func ModelFromContext(ctx *fiber.Ctx, loader *model.ModelLoader, modelInput string, firstModel bool) (string, error) {
- if ctx.Params("model") != "" {
- modelInput = ctx.Params("model")
+func (fce *FiberContextExtractor) ModelFromContext(ctx *fiber.Ctx, modelInput string, firstModel bool) (string, error) {
+ ctxPM := ctx.Params("model")
+ if ctxPM != "" {
+ log.Debug().Msgf("[FCE] Overriding param modelInput %q with ctx.Params value %q", modelInput, ctxPM)
+ modelInput = ctxPM
}
// Set model from bearer token, if available
- bearer := strings.TrimLeft(ctx.Get("authorization"), "Bearer ")
- bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
+ bearer := strings.TrimPrefix(ctx.Get("authorization"), "Bearer ")
+ bearerExists := bearer != "" && fce.ml.ExistsInModelPath(bearer)
// If no model was specified, take the first available
if modelInput == "" && !bearerExists && firstModel {
- models, _ := loader.ListModels()
+ models, _ := fce.ml.ListModels()
if len(models) > 0 {
modelInput = models[0]
- log.Debug().Msgf("No model specified, using: %s", modelInput)
+ log.Debug().Msgf("[FCE] No model specified, using first available: %s", modelInput)
} else {
- log.Debug().Msgf("No model specified, returning error")
- return "", fmt.Errorf("no model specified")
+ log.Warn().Msgf("[FCE] No model specified, none available")
+ return "", fmt.Errorf("[fce] no model specified, none available")
}
}
// If a model is found in bearer token takes precedence
if bearerExists {
- log.Debug().Msgf("Using model from bearer token: %s", bearer)
+ log.Debug().Msgf("[FCE] Using model from bearer token: %s", bearer)
modelInput = bearer
}
+
+ if modelInput == "" {
+ log.Warn().Msg("[FCE] modelInput is empty")
+ }
return modelInput, nil
}
+
+// TODO: Do we still need the first return value?
+func (fce *FiberContextExtractor) OpenAIRequestFromContext(c *fiber.Ctx, firstModel bool) (string, *schema.OpenAIRequest, error) {
+ input := new(schema.OpenAIRequest)
+
+ // Get input data from the request body
+ if err := c.BodyParser(input); err != nil {
+ return "", nil, fmt.Errorf("failed parsing request body: %w", err)
+ }
+
+ received, _ := json.Marshal(input)
+
+ ctx, cancel := context.WithCancel(fce.appConfig.Context)
+ input.Context = ctx
+ input.Cancel = cancel
+
+ log.Debug().Msgf("Request received: %s", string(received))
+
+ var err error
+ input.Model, err = fce.ModelFromContext(c, input.Model, firstModel)
+
+ return input.Model, input, err
+}
diff --git a/core/http/endpoints/elevenlabs/tts.go b/core/http/endpoints/elevenlabs/tts.go
index 841f9b5f..4f5db463 100644
--- a/core/http/endpoints/elevenlabs/tts.go
+++ b/core/http/endpoints/elevenlabs/tts.go
@@ -2,9 +2,7 @@ package elevenlabs
import (
"github.com/go-skynet/LocalAI/core/backend"
- "github.com/go-skynet/LocalAI/core/config"
fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
- "github.com/go-skynet/LocalAI/pkg/model"
"github.com/go-skynet/LocalAI/core/schema"
"github.com/gofiber/fiber/v2"
@@ -17,7 +15,7 @@ import (
// @Param request body schema.TTSRequest true "query params"
// @Success 200 {string} binary "Response"
// @Router /v1/text-to-speech/{voice-id} [post]
-func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+func TTSEndpoint(fce *fiberContext.FiberContextExtractor, ttsbs *backend.TextToSpeechBackendService) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
input := new(schema.ElevenLabsTTSRequest)
@@ -28,34 +26,21 @@ func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfi
return err
}
- modelFile, err := fiberContext.ModelFromContext(c, ml, input.ModelID, false)
+ var err error
+ input.ModelID, err = fce.ModelFromContext(c, input.ModelID, false)
if err != nil {
- modelFile = input.ModelID
log.Warn().Msgf("Model not found in context: %s", input.ModelID)
}
- cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
- config.LoadOptionDebug(appConfig.Debug),
- config.LoadOptionThreads(appConfig.Threads),
- config.LoadOptionContextSize(appConfig.ContextSize),
- config.LoadOptionF16(appConfig.F16),
- )
- if err != nil {
- modelFile = input.ModelID
- log.Warn().Msgf("Model not found in context: %s", input.ModelID)
- } else {
- if input.ModelID != "" {
- modelFile = input.ModelID
- } else {
- modelFile = cfg.Model
- }
+ responseChannel := ttsbs.TextToAudioFile(&schema.TTSRequest{
+ Model: input.ModelID,
+ Voice: voiceID,
+ Input: input.Text,
+ })
+ rawValue := <-responseChannel
+ if rawValue.Error != nil {
+ return rawValue.Error
}
- log.Debug().Msgf("Request for model: %s", modelFile)
-
- filePath, _, err := backend.ModelTTS(cfg.Backend, input.Text, modelFile, voiceID, ml, appConfig, *cfg)
- if err != nil {
- return err
- }
- return c.Download(filePath)
+ return c.Download(*rawValue.Value)
}
}
diff --git a/core/http/endpoints/localai/backend_monitor.go b/core/http/endpoints/localai/backend_monitor.go
index 8c7a664a..dac20388 100644
--- a/core/http/endpoints/localai/backend_monitor.go
+++ b/core/http/endpoints/localai/backend_monitor.go
@@ -6,7 +6,7 @@ import (
"github.com/gofiber/fiber/v2"
)
-func BackendMonitorEndpoint(bm services.BackendMonitor) func(c *fiber.Ctx) error {
+func BackendMonitorEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
input := new(schema.BackendMonitorRequest)
@@ -23,7 +23,7 @@ func BackendMonitorEndpoint(bm services.BackendMonitor) func(c *fiber.Ctx) error
}
}
-func BackendShutdownEndpoint(bm services.BackendMonitor) func(c *fiber.Ctx) error {
+func BackendShutdownEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
input := new(schema.BackendMonitorRequest)
// Get input data from the request body
diff --git a/core/http/endpoints/localai/tts.go b/core/http/endpoints/localai/tts.go
index 7822e024..df7841fb 100644
--- a/core/http/endpoints/localai/tts.go
+++ b/core/http/endpoints/localai/tts.go
@@ -2,9 +2,7 @@ package localai
import (
"github.com/go-skynet/LocalAI/core/backend"
- "github.com/go-skynet/LocalAI/core/config"
fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
- "github.com/go-skynet/LocalAI/pkg/model"
"github.com/go-skynet/LocalAI/core/schema"
"github.com/gofiber/fiber/v2"
@@ -16,45 +14,26 @@ import (
// @Param request body schema.TTSRequest true "query params"
// @Success 200 {string} binary "Response"
// @Router /v1/audio/speech [post]
-func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+func TTSEndpoint(fce *fiberContext.FiberContextExtractor, ttsbs *backend.TextToSpeechBackendService) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
-
+ var err error
input := new(schema.TTSRequest)
// Get input data from the request body
- if err := c.BodyParser(input); err != nil {
+ if err = c.BodyParser(input); err != nil {
return err
}
- modelFile, err := fiberContext.ModelFromContext(c, ml, input.Model, false)
+ input.Model, err = fce.ModelFromContext(c, input.Model, false)
if err != nil {
- modelFile = input.Model
log.Warn().Msgf("Model not found in context: %s", input.Model)
}
- cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
- config.LoadOptionDebug(appConfig.Debug),
- config.LoadOptionThreads(appConfig.Threads),
- config.LoadOptionContextSize(appConfig.ContextSize),
- config.LoadOptionF16(appConfig.F16),
- )
-
- if err != nil {
- modelFile = input.Model
- log.Warn().Msgf("Model not found in context: %s", input.Model)
- } else {
- modelFile = cfg.Model
+ responseChannel := ttsbs.TextToAudioFile(input)
+ rawValue := <-responseChannel
+ if rawValue.Error != nil {
+ return rawValue.Error
}
- log.Debug().Msgf("Request for model: %s", modelFile)
-
- if input.Backend != "" {
- cfg.Backend = input.Backend
- }
-
- filePath, _, err := backend.ModelTTS(cfg.Backend, input.Input, modelFile, input.Voice, ml, appConfig, *cfg)
- if err != nil {
- return err
- }
- return c.Download(filePath)
+ return c.Download(*rawValue.Value)
}
}
diff --git a/core/http/endpoints/openai/assistant.go b/core/http/endpoints/openai/assistant.go
index dceb3789..72cb8b4a 100644
--- a/core/http/endpoints/openai/assistant.go
+++ b/core/http/endpoints/openai/assistant.go
@@ -339,7 +339,7 @@ func CreateAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model
}
}
- return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find "))
+ return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find assistantID %q", assistantID))
}
}
diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go
index 36d1142b..a240b024 100644
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -5,17 +5,11 @@ import (
"bytes"
"encoding/json"
"fmt"
- "strings"
- "time"
- "github.com/go-skynet/LocalAI/core/backend"
- "github.com/go-skynet/LocalAI/core/config"
+ fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
"github.com/go-skynet/LocalAI/core/schema"
- "github.com/go-skynet/LocalAI/pkg/grammar"
- model "github.com/go-skynet/LocalAI/pkg/model"
- "github.com/go-skynet/LocalAI/pkg/utils"
+ "github.com/go-skynet/LocalAI/core/services"
"github.com/gofiber/fiber/v2"
- "github.com/google/uuid"
"github.com/rs/zerolog/log"
"github.com/valyala/fasthttp"
)
@@ -25,412 +19,82 @@ import (
// @Param request body schema.OpenAIRequest true "query params"
// @Success 200 {object} schema.OpenAIResponse "Response"
// @Router /v1/chat/completions [post]
-func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error {
- emptyMessage := ""
- id := uuid.New().String()
- created := int(time.Now().Unix())
-
- process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
- initialMessage := schema.OpenAIResponse{
- ID: id,
- Created: created,
- Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
- Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &emptyMessage}}},
- Object: "chat.completion.chunk",
- }
- responses <- initialMessage
-
- ComputeChoices(req, s, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
- resp := schema.OpenAIResponse{
- ID: id,
- Created: created,
- Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
- Choices: []schema.Choice{{Delta: &schema.Message{Content: &s}, Index: 0}},
- Object: "chat.completion.chunk",
- Usage: schema.OpenAIUsage{
- PromptTokens: usage.Prompt,
- CompletionTokens: usage.Completion,
- TotalTokens: usage.Prompt + usage.Completion,
- },
- }
-
- responses <- resp
- return true
- })
- close(responses)
- }
- processTools := func(noAction string, prompt string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
- result := ""
- _, tokenUsage, _ := ComputeChoices(req, prompt, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
- result += s
- // TODO: Change generated BNF grammar to be compliant with the schema so we can
- // stream the result token by token here.
- return true
- })
-
- results := parseFunctionCall(result, config.FunctionsConfig.ParallelCalls)
- noActionToRun := len(results) > 0 && results[0].name == noAction
-
- switch {
- case noActionToRun:
- initialMessage := schema.OpenAIResponse{
- ID: id,
- Created: created,
- Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
- Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &emptyMessage}}},
- Object: "chat.completion.chunk",
- }
- responses <- initialMessage
-
- result, err := handleQuestion(config, req, ml, startupOptions, results[0].arguments, prompt)
- if err != nil {
- log.Error().Err(err).Msg("error handling question")
- return
- }
-
- resp := schema.OpenAIResponse{
- ID: id,
- Created: created,
- Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
- Choices: []schema.Choice{{Delta: &schema.Message{Content: &result}, Index: 0}},
- Object: "chat.completion.chunk",
- Usage: schema.OpenAIUsage{
- PromptTokens: tokenUsage.Prompt,
- CompletionTokens: tokenUsage.Completion,
- TotalTokens: tokenUsage.Prompt + tokenUsage.Completion,
- },
- }
-
- responses <- resp
-
- default:
- for i, ss := range results {
- name, args := ss.name, ss.arguments
-
- initialMessage := schema.OpenAIResponse{
- ID: id,
- Created: created,
- Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
- Choices: []schema.Choice{{
- Delta: &schema.Message{
- Role: "assistant",
- ToolCalls: []schema.ToolCall{
- {
- Index: i,
- ID: id,
- Type: "function",
- FunctionCall: schema.FunctionCall{
- Name: name,
- },
- },
- },
- }}},
- Object: "chat.completion.chunk",
- }
- responses <- initialMessage
-
- responses <- schema.OpenAIResponse{
- ID: id,
- Created: created,
- Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
- Choices: []schema.Choice{{
- Delta: &schema.Message{
- Role: "assistant",
- ToolCalls: []schema.ToolCall{
- {
- Index: i,
- ID: id,
- Type: "function",
- FunctionCall: schema.FunctionCall{
- Arguments: args,
- },
- },
- },
- }}},
- Object: "chat.completion.chunk",
- }
- }
- }
-
- close(responses)
- }
-
+func ChatEndpoint(fce *fiberContext.FiberContextExtractor, oais *services.OpenAIService) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
- processFunctions := false
- funcs := grammar.Functions{}
- modelFile, input, err := readRequest(c, ml, startupOptions, true)
+ _, request, err := fce.OpenAIRequestFromContext(c, false)
if err != nil {
- return fmt.Errorf("failed reading parameters from request:%w", err)
+ return fmt.Errorf("failed reading parameters from request: %w", err)
}
- config, input, err := mergeRequestWithConfig(modelFile, input, cl, ml, startupOptions.Debug, startupOptions.Threads, startupOptions.ContextSize, startupOptions.F16)
+ traceID, finalResultChannel, _, tokenChannel, err := oais.Chat(request, false, request.Stream)
if err != nil {
- return fmt.Errorf("failed reading parameters from request:%w", err)
- }
- log.Debug().Msgf("Configuration read: %+v", config)
-
- // Allow the user to set custom actions via config file
- // to be "embedded" in each model
- noActionName := "answer"
- noActionDescription := "use this action to answer without performing any action"
-
- if config.FunctionsConfig.NoActionFunctionName != "" {
- noActionName = config.FunctionsConfig.NoActionFunctionName
- }
- if config.FunctionsConfig.NoActionDescriptionName != "" {
- noActionDescription = config.FunctionsConfig.NoActionDescriptionName
+ return err
}
- if input.ResponseFormat.Type == "json_object" {
- input.Grammar = grammar.JSONBNF
- }
+ if request.Stream {
- config.Grammar = input.Grammar
+ log.Debug().Msgf("Chat Stream request received")
- // process functions if we have any defined or if we have a function call string
- if len(input.Functions) > 0 && config.ShouldUseFunctions() {
- log.Debug().Msgf("Response needs to process functions")
-
- processFunctions = true
-
- noActionGrammar := grammar.Function{
- Name: noActionName,
- Description: noActionDescription,
- Parameters: map[string]interface{}{
- "properties": map[string]interface{}{
- "message": map[string]interface{}{
- "type": "string",
- "description": "The message to reply the user with",
- }},
- },
- }
-
- // Append the no action function
- funcs = append(funcs, input.Functions...)
- if !config.FunctionsConfig.DisableNoAction {
- funcs = append(funcs, noActionGrammar)
- }
-
- // Force picking one of the functions by the request
- if config.FunctionToCall() != "" {
- funcs = funcs.Select(config.FunctionToCall())
- }
-
- // Update input grammar
- jsStruct := funcs.ToJSONStructure()
- config.Grammar = jsStruct.Grammar("", config.FunctionsConfig.ParallelCalls)
- } else if input.JSONFunctionGrammarObject != nil {
- config.Grammar = input.JSONFunctionGrammarObject.Grammar("", config.FunctionsConfig.ParallelCalls)
- }
-
- // functions are not supported in stream mode (yet?)
- toStream := input.Stream
-
- log.Debug().Msgf("Parameters: %+v", config)
-
- var predInput string
-
- // If we are using the tokenizer template, we don't need to process the messages
- // unless we are processing functions
- if !config.TemplateConfig.UseTokenizerTemplate || processFunctions {
-
- suppressConfigSystemPrompt := false
- mess := []string{}
- for messageIndex, i := range input.Messages {
- var content string
- role := i.Role
-
- // if function call, we might want to customize the role so we can display better that the "assistant called a json action"
- // if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request
- if (i.FunctionCall != nil || i.ToolCalls != nil) && i.Role == "assistant" {
- roleFn := "assistant_function_call"
- r := config.Roles[roleFn]
- if r != "" {
- role = roleFn
- }
- }
- r := config.Roles[role]
- contentExists := i.Content != nil && i.StringContent != ""
-
- fcall := i.FunctionCall
- if len(i.ToolCalls) > 0 {
- fcall = i.ToolCalls
- }
-
- // First attempt to populate content via a chat message specific template
- if config.TemplateConfig.ChatMessage != "" {
- chatMessageData := model.ChatMessageTemplateData{
- SystemPrompt: config.SystemPrompt,
- Role: r,
- RoleName: role,
- Content: i.StringContent,
- FunctionCall: fcall,
- FunctionName: i.Name,
- LastMessage: messageIndex == (len(input.Messages) - 1),
- Function: config.Grammar != "" && (messageIndex == (len(input.Messages) - 1)),
- MessageIndex: messageIndex,
- }
- templatedChatMessage, err := ml.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData)
- if err != nil {
- log.Error().Err(err).Interface("message", chatMessageData).Str("template", config.TemplateConfig.ChatMessage).Msg("error processing message with template, skipping")
- } else {
- if templatedChatMessage == "" {
- log.Warn().Msgf("template \"%s\" produced blank output for %+v. Skipping!", config.TemplateConfig.ChatMessage, chatMessageData)
- continue // TODO: This continue is here intentionally to skip over the line `mess = append(mess, content)` below, and to prevent the sprintf
- }
- log.Debug().Msgf("templated message for chat: %s", templatedChatMessage)
- content = templatedChatMessage
- }
- }
-
- marshalAnyRole := func(f any) {
- j, err := json.Marshal(f)
- if err == nil {
- if contentExists {
- content += "\n" + fmt.Sprint(r, " ", string(j))
- } else {
- content = fmt.Sprint(r, " ", string(j))
- }
- }
- }
- marshalAny := func(f any) {
- j, err := json.Marshal(f)
- if err == nil {
- if contentExists {
- content += "\n" + string(j)
- } else {
- content = string(j)
- }
- }
- }
- // If this model doesn't have such a template, or if that template fails to return a value, template at the message level.
- if content == "" {
- if r != "" {
- if contentExists {
- content = fmt.Sprint(r, i.StringContent)
- }
-
- if i.FunctionCall != nil {
- marshalAnyRole(i.FunctionCall)
- }
- if i.ToolCalls != nil {
- marshalAnyRole(i.ToolCalls)
- }
- } else {
- if contentExists {
- content = fmt.Sprint(i.StringContent)
- }
- if i.FunctionCall != nil {
- marshalAny(i.FunctionCall)
- }
- if i.ToolCalls != nil {
- marshalAny(i.ToolCalls)
- }
- }
- // Special Handling: System. We care if it was printed at all, not the r branch, so check seperately
- if contentExists && role == "system" {
- suppressConfigSystemPrompt = true
- }
- }
-
- mess = append(mess, content)
- }
-
- predInput = strings.Join(mess, "\n")
- log.Debug().Msgf("Prompt (before templating): %s", predInput)
-
- templateFile := ""
-
- // A model can have a "file.bin.tmpl" file associated with a prompt template prefix
- if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
- templateFile = config.Model
- }
-
- if config.TemplateConfig.Chat != "" && !processFunctions {
- templateFile = config.TemplateConfig.Chat
- }
-
- if config.TemplateConfig.Functions != "" && processFunctions {
- templateFile = config.TemplateConfig.Functions
- }
-
- if templateFile != "" {
- templatedInput, err := ml.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{
- SystemPrompt: config.SystemPrompt,
- SuppressSystemPrompt: suppressConfigSystemPrompt,
- Input: predInput,
- Functions: funcs,
- })
- if err == nil {
- predInput = templatedInput
- log.Debug().Msgf("Template found, input modified to: %s", predInput)
- } else {
- log.Debug().Msgf("Template failed loading: %s", err.Error())
- }
- }
-
- log.Debug().Msgf("Prompt (after templating): %s", predInput)
- if processFunctions {
- log.Debug().Msgf("Grammar: %+v", config.Grammar)
- }
- }
-
- switch {
- case toStream:
-
- log.Debug().Msgf("Stream request received")
c.Context().SetContentType("text/event-stream")
//c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8)
- // c.Set("Content-Type", "text/event-stream")
+ //
c.Set("Cache-Control", "no-cache")
c.Set("Connection", "keep-alive")
c.Set("Transfer-Encoding", "chunked")
- responses := make(chan schema.OpenAIResponse)
-
- if !processFunctions {
- go process(predInput, input, config, ml, responses)
- } else {
- go processTools(noActionName, predInput, input, config, ml, responses)
- }
-
c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
usage := &schema.OpenAIUsage{}
toolsCalled := false
- for ev := range responses {
- usage = &ev.Usage // Copy a pointer to the latest usage chunk so that the stop message can reference it
- if len(ev.Choices[0].Delta.ToolCalls) > 0 {
+ for ev := range tokenChannel {
+ if ev.Error != nil {
+ log.Debug().Err(ev.Error).Msg("chat streaming responseChannel error")
+ request.Cancel()
+ break
+ }
+ usage = &ev.Value.Usage // Copy a pointer to the latest usage chunk so that the stop message can reference it
+
+ if len(ev.Value.Choices[0].Delta.ToolCalls) > 0 {
toolsCalled = true
}
var buf bytes.Buffer
enc := json.NewEncoder(&buf)
- enc.Encode(ev)
- log.Debug().Msgf("Sending chunk: %s", buf.String())
+ if ev.Error != nil {
+ log.Debug().Err(ev.Error).Msg("[ChatEndpoint] error to debug during tokenChannel handler")
+ enc.Encode(ev.Error)
+ } else {
+ enc.Encode(ev.Value)
+ }
+ log.Debug().Msgf("chat streaming sending chunk: %s", buf.String())
_, err := fmt.Fprintf(w, "data: %v\n", buf.String())
if err != nil {
- log.Debug().Msgf("Sending chunk failed: %v", err)
- input.Cancel()
+ log.Debug().Err(err).Msgf("Sending chunk failed")
+ request.Cancel()
+ break
+ }
+ err = w.Flush()
+ if err != nil {
+ log.Debug().Msg("error while flushing, closing connection")
+ request.Cancel()
break
}
- w.Flush()
}
finishReason := "stop"
if toolsCalled {
finishReason = "tool_calls"
- } else if toolsCalled && len(input.Tools) == 0 {
+ } else if toolsCalled && len(request.Tools) == 0 {
finishReason = "function_call"
}
resp := &schema.OpenAIResponse{
- ID: id,
- Created: created,
- Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
+ ID: traceID.ID,
+ Created: traceID.Created,
+ Model: request.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []schema.Choice{
{
FinishReason: finishReason,
Index: 0,
- Delta: &schema.Message{Content: &emptyMessage},
+ Delta: &schema.Message{Content: ""},
}},
Object: "chat.completion.chunk",
Usage: *usage,
@@ -441,202 +105,21 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
w.WriteString("data: [DONE]\n\n")
w.Flush()
}))
+
return nil
-
- // no streaming mode
- default:
- result, tokenUsage, err := ComputeChoices(input, predInput, config, startupOptions, ml, func(s string, c *[]schema.Choice) {
- if !processFunctions {
- // no function is called, just reply and use stop as finish reason
- *c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
- return
- }
-
- results := parseFunctionCall(s, config.FunctionsConfig.ParallelCalls)
- noActionsToRun := len(results) > 0 && results[0].name == noActionName
-
- switch {
- case noActionsToRun:
- result, err := handleQuestion(config, input, ml, startupOptions, results[0].arguments, predInput)
- if err != nil {
- log.Error().Err(err).Msg("error handling question")
- return
- }
- *c = append(*c, schema.Choice{
- Message: &schema.Message{Role: "assistant", Content: &result}})
- default:
- toolChoice := schema.Choice{
- Message: &schema.Message{
- Role: "assistant",
- },
- }
-
- if len(input.Tools) > 0 {
- toolChoice.FinishReason = "tool_calls"
- }
-
- for _, ss := range results {
- name, args := ss.name, ss.arguments
- if len(input.Tools) > 0 {
- // If we are using tools, we condense the function calls into
- // a single response choice with all the tools
- toolChoice.Message.ToolCalls = append(toolChoice.Message.ToolCalls,
- schema.ToolCall{
- ID: id,
- Type: "function",
- FunctionCall: schema.FunctionCall{
- Name: name,
- Arguments: args,
- },
- },
- )
- } else {
- // otherwise we return more choices directly
- *c = append(*c, schema.Choice{
- FinishReason: "function_call",
- Message: &schema.Message{
- Role: "assistant",
- FunctionCall: map[string]interface{}{
- "name": name,
- "arguments": args,
- },
- },
- })
- }
- }
-
- if len(input.Tools) > 0 {
- // we need to append our result if we are using tools
- *c = append(*c, toolChoice)
- }
- }
-
- }, nil)
- if err != nil {
- return err
- }
-
- resp := &schema.OpenAIResponse{
- ID: id,
- Created: created,
- Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
- Choices: result,
- Object: "chat.completion",
- Usage: schema.OpenAIUsage{
- PromptTokens: tokenUsage.Prompt,
- CompletionTokens: tokenUsage.Completion,
- TotalTokens: tokenUsage.Prompt + tokenUsage.Completion,
- },
- }
- respData, _ := json.Marshal(resp)
- log.Debug().Msgf("Response: %s", respData)
-
- // Return the prediction in the response body
- return c.JSON(resp)
}
+ // TODO is this proper to have exclusive from Stream, or do we need to issue both responses?
+ rawResponse := <-finalResultChannel
+
+ if rawResponse.Error != nil {
+ return rawResponse.Error
+ }
+
+ jsonResult, _ := json.Marshal(rawResponse.Value)
+ log.Debug().Str("jsonResult", string(jsonResult)).Msg("Chat Final Response")
+
+ // Return the prediction in the response body
+ return c.JSON(rawResponse.Value)
}
}
-
-func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, ml *model.ModelLoader, o *config.ApplicationConfig, args, prompt string) (string, error) {
- log.Debug().Msgf("nothing to do, computing a reply")
-
- // If there is a message that the LLM already sends as part of the JSON reply, use it
- arguments := map[string]interface{}{}
- json.Unmarshal([]byte(args), &arguments)
- m, exists := arguments["message"]
- if exists {
- switch message := m.(type) {
- case string:
- if message != "" {
- log.Debug().Msgf("Reply received from LLM: %s", message)
- message = backend.Finetune(*config, prompt, message)
- log.Debug().Msgf("Reply received from LLM(finetuned): %s", message)
-
- return message, nil
- }
- }
- }
-
- log.Debug().Msgf("No action received from LLM, without a message, computing a reply")
- // Otherwise ask the LLM to understand the JSON output and the context, and return a message
- // Note: This costs (in term of CPU/GPU) another computation
- config.Grammar = ""
- images := []string{}
- for _, m := range input.Messages {
- images = append(images, m.StringImages...)
- }
-
- predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, ml, *config, o, nil)
- if err != nil {
- log.Error().Err(err).Msg("model inference failed")
- return "", err
- }
-
- prediction, err := predFunc()
- if err != nil {
- log.Error().Err(err).Msg("prediction failed")
- return "", err
- }
- return backend.Finetune(*config, prompt, prediction.Response), nil
-}
-
-type funcCallResults struct {
- name string
- arguments string
-}
-
-func parseFunctionCall(llmresult string, multipleResults bool) []funcCallResults {
- results := []funcCallResults{}
-
- // TODO: use generics to avoid this code duplication
- if multipleResults {
- ss := []map[string]interface{}{}
- s := utils.EscapeNewLines(llmresult)
- json.Unmarshal([]byte(s), &ss)
- log.Debug().Msgf("Function return: %s %+v", s, ss)
-
- for _, s := range ss {
- func_name, ok := s["function"]
- if !ok {
- continue
- }
- args, ok := s["arguments"]
- if !ok {
- continue
- }
- d, _ := json.Marshal(args)
- funcName, ok := func_name.(string)
- if !ok {
- continue
- }
- results = append(results, funcCallResults{name: funcName, arguments: string(d)})
- }
- } else {
- // As we have to change the result before processing, we can't stream the answer token-by-token (yet?)
- ss := map[string]interface{}{}
- // This prevent newlines to break JSON parsing for clients
- s := utils.EscapeNewLines(llmresult)
- json.Unmarshal([]byte(s), &ss)
- log.Debug().Msgf("Function return: %s %+v", s, ss)
-
- // The grammar defines the function name as "function", while OpenAI returns "name"
- func_name, ok := ss["function"]
- if !ok {
- return results
- }
- // Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object
- args, ok := ss["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix)
- if !ok {
- return results
- }
- d, _ := json.Marshal(args)
- funcName, ok := func_name.(string)
- if !ok {
- return results
- }
- results = append(results, funcCallResults{name: funcName, arguments: string(d)})
- }
-
- return results
-}
diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go
index 69923475..d8b412a9 100644
--- a/core/http/endpoints/openai/completion.go
+++ b/core/http/endpoints/openai/completion.go
@@ -4,18 +4,13 @@ import (
"bufio"
"bytes"
"encoding/json"
- "errors"
"fmt"
- "time"
- "github.com/go-skynet/LocalAI/core/backend"
- "github.com/go-skynet/LocalAI/core/config"
+ fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
+ "github.com/go-skynet/LocalAI/core/services"
"github.com/go-skynet/LocalAI/core/schema"
- "github.com/go-skynet/LocalAI/pkg/grammar"
- model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/gofiber/fiber/v2"
- "github.com/google/uuid"
"github.com/rs/zerolog/log"
"github.com/valyala/fasthttp"
)
@@ -25,116 +20,50 @@ import (
// @Param request body schema.OpenAIRequest true "query params"
// @Success 200 {object} schema.OpenAIResponse "Response"
// @Router /v1/completions [post]
-func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
- id := uuid.New().String()
- created := int(time.Now().Unix())
-
- process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
- ComputeChoices(req, s, config, appConfig, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
- resp := schema.OpenAIResponse{
- ID: id,
- Created: created,
- Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
- Choices: []schema.Choice{
- {
- Index: 0,
- Text: s,
- },
- },
- Object: "text_completion",
- Usage: schema.OpenAIUsage{
- PromptTokens: usage.Prompt,
- CompletionTokens: usage.Completion,
- TotalTokens: usage.Prompt + usage.Completion,
- },
- }
- log.Debug().Msgf("Sending goroutine: %s", s)
-
- responses <- resp
- return true
- })
- close(responses)
- }
-
+func CompletionEndpoint(fce *fiberContext.FiberContextExtractor, oais *services.OpenAIService) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
- modelFile, input, err := readRequest(c, ml, appConfig, true)
+ _, request, err := fce.OpenAIRequestFromContext(c, false)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
- log.Debug().Msgf("`input`: %+v", input)
+ log.Debug().Msgf("`OpenAIRequest`: %+v", request)
- config, input, err := mergeRequestWithConfig(modelFile, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16)
+ traceID, finalResultChannel, _, _, tokenChannel, err := oais.Completion(request, false, request.Stream)
if err != nil {
- return fmt.Errorf("failed reading parameters from request:%w", err)
+ return err
}
- if input.ResponseFormat.Type == "json_object" {
- input.Grammar = grammar.JSONBNF
- }
+ if request.Stream {
+ log.Debug().Msgf("Completion Stream request received")
- config.Grammar = input.Grammar
-
- log.Debug().Msgf("Parameter Config: %+v", config)
-
- if input.Stream {
- log.Debug().Msgf("Stream request received")
c.Context().SetContentType("text/event-stream")
//c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8)
//c.Set("Content-Type", "text/event-stream")
c.Set("Cache-Control", "no-cache")
c.Set("Connection", "keep-alive")
c.Set("Transfer-Encoding", "chunked")
- }
-
- templateFile := ""
-
- // A model can have a "file.bin.tmpl" file associated with a prompt template prefix
- if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
- templateFile = config.Model
- }
-
- if config.TemplateConfig.Completion != "" {
- templateFile = config.TemplateConfig.Completion
- }
-
- if input.Stream {
- if len(config.PromptStrings) > 1 {
- return errors.New("cannot handle more than 1 `PromptStrings` when Streaming")
- }
-
- predInput := config.PromptStrings[0]
-
- if templateFile != "" {
- templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
- Input: predInput,
- })
- if err == nil {
- predInput = templatedInput
- log.Debug().Msgf("Template found, input modified to: %s", predInput)
- }
- }
-
- responses := make(chan schema.OpenAIResponse)
-
- go process(predInput, input, config, ml, responses)
c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
-
- for ev := range responses {
+ for ev := range tokenChannel {
var buf bytes.Buffer
enc := json.NewEncoder(&buf)
- enc.Encode(ev)
+ if ev.Error != nil {
+ log.Debug().Msgf("[CompletionEndpoint] error to debug during tokenChannel handler: %q", ev.Error)
+ enc.Encode(ev.Error)
+ } else {
+ enc.Encode(ev.Value)
+ }
- log.Debug().Msgf("Sending chunk: %s", buf.String())
+ log.Debug().Msgf("completion streaming sending chunk: %s", buf.String())
fmt.Fprintf(w, "data: %v\n", buf.String())
w.Flush()
}
resp := &schema.OpenAIResponse{
- ID: id,
- Created: created,
- Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
+ ID: traceID.ID,
+ Created: traceID.Created,
+ Model: request.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []schema.Choice{
{
Index: 0,
@@ -151,55 +80,15 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
}))
return nil
}
-
- var result []schema.Choice
-
- totalTokenUsage := backend.TokenUsage{}
-
- for k, i := range config.PromptStrings {
- if templateFile != "" {
- // A model can have a "file.bin.tmpl" file associated with a prompt template prefix
- templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
- SystemPrompt: config.SystemPrompt,
- Input: i,
- })
- if err == nil {
- i = templatedInput
- log.Debug().Msgf("Template found, input modified to: %s", i)
- }
- }
-
- r, tokenUsage, err := ComputeChoices(
- input, i, config, appConfig, ml, func(s string, c *[]schema.Choice) {
- *c = append(*c, schema.Choice{Text: s, FinishReason: "stop", Index: k})
- }, nil)
- if err != nil {
- return err
- }
-
- totalTokenUsage.Prompt += tokenUsage.Prompt
- totalTokenUsage.Completion += tokenUsage.Completion
-
- result = append(result, r...)
+ // TODO is this proper to have exclusive from Stream, or do we need to issue both responses?
+ rawResponse := <-finalResultChannel
+ if rawResponse.Error != nil {
+ return rawResponse.Error
}
-
- resp := &schema.OpenAIResponse{
- ID: id,
- Created: created,
- Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
- Choices: result,
- Object: "text_completion",
- Usage: schema.OpenAIUsage{
- PromptTokens: totalTokenUsage.Prompt,
- CompletionTokens: totalTokenUsage.Completion,
- TotalTokens: totalTokenUsage.Prompt + totalTokenUsage.Completion,
- },
- }
-
- jsonResult, _ := json.Marshal(resp)
+ jsonResult, _ := json.Marshal(rawResponse.Value)
log.Debug().Msgf("Response: %s", jsonResult)
// Return the prediction in the response body
- return c.JSON(resp)
+ return c.JSON(rawResponse.Value)
}
}
diff --git a/core/http/endpoints/openai/edit.go b/core/http/endpoints/openai/edit.go
index 25497095..a33050dd 100644
--- a/core/http/endpoints/openai/edit.go
+++ b/core/http/endpoints/openai/edit.go
@@ -3,92 +3,36 @@ package openai
import (
"encoding/json"
"fmt"
- "time"
- "github.com/go-skynet/LocalAI/core/backend"
- "github.com/go-skynet/LocalAI/core/config"
+ fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
+ "github.com/go-skynet/LocalAI/core/services"
- "github.com/go-skynet/LocalAI/core/schema"
- model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/gofiber/fiber/v2"
- "github.com/google/uuid"
"github.com/rs/zerolog/log"
)
-func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+func EditEndpoint(fce *fiberContext.FiberContextExtractor, oais *services.OpenAIService) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
- modelFile, input, err := readRequest(c, ml, appConfig, true)
+ _, request, err := fce.OpenAIRequestFromContext(c, false)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
- config, input, err := mergeRequestWithConfig(modelFile, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16)
+ _, finalResultChannel, _, _, _, err := oais.Edit(request, false, request.Stream)
if err != nil {
- return fmt.Errorf("failed reading parameters from request:%w", err)
+ return err
}
- log.Debug().Msgf("Parameter Config: %+v", config)
-
- templateFile := ""
-
- // A model can have a "file.bin.tmpl" file associated with a prompt template prefix
- if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
- templateFile = config.Model
+ rawResponse := <-finalResultChannel
+ if rawResponse.Error != nil {
+ return rawResponse.Error
}
- if config.TemplateConfig.Edit != "" {
- templateFile = config.TemplateConfig.Edit
- }
-
- var result []schema.Choice
- totalTokenUsage := backend.TokenUsage{}
-
- for _, i := range config.InputStrings {
- if templateFile != "" {
- templatedInput, err := ml.EvaluateTemplateForPrompt(model.EditPromptTemplate, templateFile, model.PromptTemplateData{
- Input: i,
- Instruction: input.Instruction,
- SystemPrompt: config.SystemPrompt,
- })
- if err == nil {
- i = templatedInput
- log.Debug().Msgf("Template found, input modified to: %s", i)
- }
- }
-
- r, tokenUsage, err := ComputeChoices(input, i, config, appConfig, ml, func(s string, c *[]schema.Choice) {
- *c = append(*c, schema.Choice{Text: s})
- }, nil)
- if err != nil {
- return err
- }
-
- totalTokenUsage.Prompt += tokenUsage.Prompt
- totalTokenUsage.Completion += tokenUsage.Completion
-
- result = append(result, r...)
- }
-
- id := uuid.New().String()
- created := int(time.Now().Unix())
- resp := &schema.OpenAIResponse{
- ID: id,
- Created: created,
- Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
- Choices: result,
- Object: "edit",
- Usage: schema.OpenAIUsage{
- PromptTokens: totalTokenUsage.Prompt,
- CompletionTokens: totalTokenUsage.Completion,
- TotalTokens: totalTokenUsage.Prompt + totalTokenUsage.Completion,
- },
- }
-
- jsonResult, _ := json.Marshal(resp)
+ jsonResult, _ := json.Marshal(rawResponse.Value)
log.Debug().Msgf("Response: %s", jsonResult)
// Return the prediction in the response body
- return c.JSON(resp)
+ return c.JSON(rawResponse.Value)
}
}
diff --git a/core/http/endpoints/openai/embeddings.go b/core/http/endpoints/openai/embeddings.go
index eca34f79..be546991 100644
--- a/core/http/endpoints/openai/embeddings.go
+++ b/core/http/endpoints/openai/embeddings.go
@@ -3,14 +3,9 @@ package openai
import (
"encoding/json"
"fmt"
- "time"
"github.com/go-skynet/LocalAI/core/backend"
- "github.com/go-skynet/LocalAI/core/config"
- "github.com/go-skynet/LocalAI/pkg/model"
-
- "github.com/go-skynet/LocalAI/core/schema"
- "github.com/google/uuid"
+ fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
"github.com/gofiber/fiber/v2"
"github.com/rs/zerolog/log"
@@ -21,63 +16,25 @@ import (
// @Param request body schema.OpenAIRequest true "query params"
// @Success 200 {object} schema.OpenAIResponse "Response"
// @Router /v1/embeddings [post]
-func EmbeddingsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+func EmbeddingsEndpoint(fce *fiberContext.FiberContextExtractor, ebs *backend.EmbeddingsBackendService) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
- model, input, err := readRequest(c, ml, appConfig, true)
+ _, input, err := fce.OpenAIRequestFromContext(c, true)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
- config, input, err := mergeRequestWithConfig(model, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16)
- if err != nil {
- return fmt.Errorf("failed reading parameters from request:%w", err)
+ responseChannel := ebs.Embeddings(input)
+
+ rawResponse := <-responseChannel
+
+ if rawResponse.Error != nil {
+ return rawResponse.Error
}
- log.Debug().Msgf("Parameter Config: %+v", config)
- items := []schema.Item{}
-
- for i, s := range config.InputToken {
- // get the model function to call for the result
- embedFn, err := backend.ModelEmbedding("", s, ml, *config, appConfig)
- if err != nil {
- return err
- }
-
- embeddings, err := embedFn()
- if err != nil {
- return err
- }
- items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"})
- }
-
- for i, s := range config.InputStrings {
- // get the model function to call for the result
- embedFn, err := backend.ModelEmbedding(s, []int{}, ml, *config, appConfig)
- if err != nil {
- return err
- }
-
- embeddings, err := embedFn()
- if err != nil {
- return err
- }
- items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"})
- }
-
- id := uuid.New().String()
- created := int(time.Now().Unix())
- resp := &schema.OpenAIResponse{
- ID: id,
- Created: created,
- Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
- Data: items,
- Object: "list",
- }
-
- jsonResult, _ := json.Marshal(resp)
+ jsonResult, _ := json.Marshal(rawResponse.Value)
log.Debug().Msgf("Response: %s", jsonResult)
// Return the prediction in the response body
- return c.JSON(resp)
+ return c.JSON(rawResponse.Value)
}
}
diff --git a/core/http/endpoints/openai/image.go b/core/http/endpoints/openai/image.go
index 9e806b3e..ec3d84da 100644
--- a/core/http/endpoints/openai/image.go
+++ b/core/http/endpoints/openai/image.go
@@ -1,50 +1,18 @@
package openai
import (
- "bufio"
- "encoding/base64"
"encoding/json"
"fmt"
- "io"
- "net/http"
- "os"
- "path/filepath"
- "strconv"
- "strings"
- "time"
- "github.com/go-skynet/LocalAI/core/config"
- "github.com/go-skynet/LocalAI/core/schema"
- "github.com/google/uuid"
+ fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
"github.com/go-skynet/LocalAI/core/backend"
- model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/gofiber/fiber/v2"
"github.com/rs/zerolog/log"
)
-func downloadFile(url string) (string, error) {
- // Get the data
- resp, err := http.Get(url)
- if err != nil {
- return "", err
- }
- defer resp.Body.Close()
-
- // Create the file
- out, err := os.CreateTemp("", "image")
- if err != nil {
- return "", err
- }
- defer out.Close()
-
- // Write the body to file
- _, err = io.Copy(out, resp.Body)
- return out.Name(), err
-}
-
-//
+// https://platform.openai.com/docs/api-reference/images/create
/*
*
@@ -59,186 +27,36 @@ func downloadFile(url string) (string, error) {
*
*/
+
// ImageEndpoint is the OpenAI Image generation API endpoint https://platform.openai.com/docs/api-reference/images/create
// @Summary Creates an image given a prompt.
// @Param request body schema.OpenAIRequest true "query params"
// @Success 200 {object} schema.OpenAIResponse "Response"
// @Router /v1/images/generations [post]
-func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+func ImageEndpoint(fce *fiberContext.FiberContextExtractor, igbs *backend.ImageGenerationBackendService) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
- m, input, err := readRequest(c, ml, appConfig, false)
+ // TODO: Somewhat a hack. Is there a better place to assign this?
+ if igbs.BaseUrlForGeneratedImages == "" {
+ igbs.BaseUrlForGeneratedImages = c.BaseURL() + "/generated-images/"
+ }
+ _, request, err := fce.OpenAIRequestFromContext(c, false)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
- if m == "" {
- m = model.StableDiffusionBackend
- }
- log.Debug().Msgf("Loading model: %+v", m)
+ responseChannel := igbs.GenerateImage(request)
+ rawResponse := <-responseChannel
- config, input, err := mergeRequestWithConfig(m, input, cl, ml, appConfig.Debug, 0, 0, false)
+ if rawResponse.Error != nil {
+ return rawResponse.Error
+ }
+
+ jsonResult, err := json.Marshal(rawResponse.Value)
if err != nil {
- return fmt.Errorf("failed reading parameters from request:%w", err)
+ return err
}
-
- src := ""
- if input.File != "" {
-
- fileData := []byte{}
- // check if input.File is an URL, if so download it and save it
- // to a temporary file
- if strings.HasPrefix(input.File, "http://") || strings.HasPrefix(input.File, "https://") {
- out, err := downloadFile(input.File)
- if err != nil {
- return fmt.Errorf("failed downloading file:%w", err)
- }
- defer os.RemoveAll(out)
-
- fileData, err = os.ReadFile(out)
- if err != nil {
- return fmt.Errorf("failed reading file:%w", err)
- }
-
- } else {
- // base 64 decode the file and write it somewhere
- // that we will cleanup
- fileData, err = base64.StdEncoding.DecodeString(input.File)
- if err != nil {
- return err
- }
- }
-
- // Create a temporary file
- outputFile, err := os.CreateTemp(appConfig.ImageDir, "b64")
- if err != nil {
- return err
- }
- // write the base64 result
- writer := bufio.NewWriter(outputFile)
- _, err = writer.Write(fileData)
- if err != nil {
- outputFile.Close()
- return err
- }
- outputFile.Close()
- src = outputFile.Name()
- defer os.RemoveAll(src)
- }
-
- log.Debug().Msgf("Parameter Config: %+v", config)
-
- switch config.Backend {
- case "stablediffusion":
- config.Backend = model.StableDiffusionBackend
- case "tinydream":
- config.Backend = model.TinyDreamBackend
- case "":
- config.Backend = model.StableDiffusionBackend
- }
-
- sizeParts := strings.Split(input.Size, "x")
- if len(sizeParts) != 2 {
- return fmt.Errorf("invalid value for 'size'")
- }
- width, err := strconv.Atoi(sizeParts[0])
- if err != nil {
- return fmt.Errorf("invalid value for 'size'")
- }
- height, err := strconv.Atoi(sizeParts[1])
- if err != nil {
- return fmt.Errorf("invalid value for 'size'")
- }
-
- b64JSON := false
- if input.ResponseFormat.Type == "b64_json" {
- b64JSON = true
- }
- // src and clip_skip
- var result []schema.Item
- for _, i := range config.PromptStrings {
- n := input.N
- if input.N == 0 {
- n = 1
- }
- for j := 0; j < n; j++ {
- prompts := strings.Split(i, "|")
- positive_prompt := prompts[0]
- negative_prompt := ""
- if len(prompts) > 1 {
- negative_prompt = prompts[1]
- }
-
- mode := 0
- step := config.Step
- if step == 0 {
- step = 15
- }
-
- if input.Mode != 0 {
- mode = input.Mode
- }
-
- if input.Step != 0 {
- step = input.Step
- }
-
- tempDir := ""
- if !b64JSON {
- tempDir = appConfig.ImageDir
- }
- // Create a temporary file
- outputFile, err := os.CreateTemp(tempDir, "b64")
- if err != nil {
- return err
- }
- outputFile.Close()
- output := outputFile.Name() + ".png"
- // Rename the temporary file
- err = os.Rename(outputFile.Name(), output)
- if err != nil {
- return err
- }
-
- baseURL := c.BaseURL()
-
- fn, err := backend.ImageGeneration(height, width, mode, step, *config.Seed, positive_prompt, negative_prompt, src, output, ml, *config, appConfig)
- if err != nil {
- return err
- }
- if err := fn(); err != nil {
- return err
- }
-
- item := &schema.Item{}
-
- if b64JSON {
- defer os.RemoveAll(output)
- data, err := os.ReadFile(output)
- if err != nil {
- return err
- }
- item.B64JSON = base64.StdEncoding.EncodeToString(data)
- } else {
- base := filepath.Base(output)
- item.URL = baseURL + "/generated-images/" + base
- }
-
- result = append(result, *item)
- }
- }
-
- id := uuid.New().String()
- created := int(time.Now().Unix())
- resp := &schema.OpenAIResponse{
- ID: id,
- Created: created,
- Data: result,
- }
-
- jsonResult, _ := json.Marshal(resp)
log.Debug().Msgf("Response: %s", jsonResult)
-
// Return the prediction in the response body
- return c.JSON(resp)
+ return c.JSON(rawResponse.Value)
}
}
diff --git a/core/http/endpoints/openai/inference.go b/core/http/endpoints/openai/inference.go
deleted file mode 100644
index 06e784b7..00000000
--- a/core/http/endpoints/openai/inference.go
+++ /dev/null
@@ -1,55 +0,0 @@
-package openai
-
-import (
- "github.com/go-skynet/LocalAI/core/backend"
- "github.com/go-skynet/LocalAI/core/config"
-
- "github.com/go-skynet/LocalAI/core/schema"
- model "github.com/go-skynet/LocalAI/pkg/model"
-)
-
-func ComputeChoices(
- req *schema.OpenAIRequest,
- predInput string,
- config *config.BackendConfig,
- o *config.ApplicationConfig,
- loader *model.ModelLoader,
- cb func(string, *[]schema.Choice),
- tokenCallback func(string, backend.TokenUsage) bool) ([]schema.Choice, backend.TokenUsage, error) {
- n := req.N // number of completions to return
- result := []schema.Choice{}
-
- if n == 0 {
- n = 1
- }
-
- images := []string{}
- for _, m := range req.Messages {
- images = append(images, m.StringImages...)
- }
-
- // get the model function to call for the result
- predFunc, err := backend.ModelInference(req.Context, predInput, req.Messages, images, loader, *config, o, tokenCallback)
- if err != nil {
- return result, backend.TokenUsage{}, err
- }
-
- tokenUsage := backend.TokenUsage{}
-
- for i := 0; i < n; i++ {
- prediction, err := predFunc()
- if err != nil {
- return result, backend.TokenUsage{}, err
- }
-
- tokenUsage.Prompt += prediction.Usage.Prompt
- tokenUsage.Completion += prediction.Usage.Completion
-
- finetunedResponse := backend.Finetune(*config, predInput, prediction.Response)
- cb(finetunedResponse, &result)
-
- //result = append(result, Choice{Text: prediction})
-
- }
- return result, tokenUsage, err
-}
diff --git a/core/http/endpoints/openai/list.go b/core/http/endpoints/openai/list.go
index 04e611a2..9bb2b2ca 100644
--- a/core/http/endpoints/openai/list.go
+++ b/core/http/endpoints/openai/list.go
@@ -1,61 +1,21 @@
package openai
import (
- "regexp"
-
- "github.com/go-skynet/LocalAI/core/config"
"github.com/go-skynet/LocalAI/core/schema"
- model "github.com/go-skynet/LocalAI/pkg/model"
+ "github.com/go-skynet/LocalAI/core/services"
"github.com/gofiber/fiber/v2"
)
-func ListModelsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader) func(ctx *fiber.Ctx) error {
+func ListModelsEndpoint(lms *services.ListModelsService) func(ctx *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
- models, err := ml.ListModels()
- if err != nil {
- return err
- }
- var mm map[string]interface{} = map[string]interface{}{}
-
- dataModels := []schema.OpenAIModel{}
-
- var filterFn func(name string) bool
+ // If blank, no filter is applied.
filter := c.Query("filter")
-
- // If filter is not specified, do not filter the list by model name
- if filter == "" {
- filterFn = func(_ string) bool { return true }
- } else {
- // If filter _IS_ specified, we compile it to a regex which is used to create the filterFn
- rxp, err := regexp.Compile(filter)
- if err != nil {
- return err
- }
- filterFn = func(name string) bool {
- return rxp.MatchString(name)
- }
- }
-
// By default, exclude any loose files that are already referenced by a configuration file.
excludeConfigured := c.QueryBool("excludeConfigured", true)
- // Start with the known configurations
- for _, c := range cl.GetAllBackendConfigs() {
- if excludeConfigured {
- mm[c.Model] = nil
- }
-
- if filterFn(c.Name) {
- dataModels = append(dataModels, schema.OpenAIModel{ID: c.Name, Object: "model"})
- }
- }
-
- // Then iterate through the loose files:
- for _, m := range models {
- // And only adds them if they shouldn't be skipped.
- if _, exists := mm[m]; !exists && filterFn(m) {
- dataModels = append(dataModels, schema.OpenAIModel{ID: m, Object: "model"})
- }
+ dataModels, err := lms.ListModels(filter, excludeConfigured)
+ if err != nil {
+ return err
}
return c.JSON(struct {
diff --git a/core/http/endpoints/openai/request.go b/core/http/endpoints/openai/request.go
deleted file mode 100644
index 369fb0b8..00000000
--- a/core/http/endpoints/openai/request.go
+++ /dev/null
@@ -1,285 +0,0 @@
-package openai
-
-import (
- "context"
- "encoding/base64"
- "encoding/json"
- "fmt"
- "io"
- "net/http"
- "strings"
-
- "github.com/go-skynet/LocalAI/core/config"
- fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
- "github.com/go-skynet/LocalAI/core/schema"
- "github.com/go-skynet/LocalAI/pkg/grammar"
- model "github.com/go-skynet/LocalAI/pkg/model"
- "github.com/gofiber/fiber/v2"
- "github.com/rs/zerolog/log"
-)
-
-func readRequest(c *fiber.Ctx, ml *model.ModelLoader, o *config.ApplicationConfig, firstModel bool) (string, *schema.OpenAIRequest, error) {
- input := new(schema.OpenAIRequest)
-
- // Get input data from the request body
- if err := c.BodyParser(input); err != nil {
- return "", nil, fmt.Errorf("failed parsing request body: %w", err)
- }
-
- received, _ := json.Marshal(input)
-
- ctx, cancel := context.WithCancel(o.Context)
- input.Context = ctx
- input.Cancel = cancel
-
- log.Debug().Msgf("Request received: %s", string(received))
-
- modelFile, err := fiberContext.ModelFromContext(c, ml, input.Model, firstModel)
-
- return modelFile, input, err
-}
-
-// this function check if the string is an URL, if it's an URL downloads the image in memory
-// encodes it in base64 and returns the base64 string
-func getBase64Image(s string) (string, error) {
- if strings.HasPrefix(s, "http") {
- // download the image
- resp, err := http.Get(s)
- if err != nil {
- return "", err
- }
- defer resp.Body.Close()
-
- // read the image data into memory
- data, err := io.ReadAll(resp.Body)
- if err != nil {
- return "", err
- }
-
- // encode the image data in base64
- encoded := base64.StdEncoding.EncodeToString(data)
-
- // return the base64 string
- return encoded, nil
- }
-
- // if the string instead is prefixed with "data:image/jpeg;base64,", drop it
- if strings.HasPrefix(s, "data:image/jpeg;base64,") {
- return strings.ReplaceAll(s, "data:image/jpeg;base64,", ""), nil
- }
- return "", fmt.Errorf("not valid string")
-}
-
-func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIRequest) {
- if input.Echo {
- config.Echo = input.Echo
- }
- if input.TopK != nil {
- config.TopK = input.TopK
- }
- if input.TopP != nil {
- config.TopP = input.TopP
- }
-
- if input.Backend != "" {
- config.Backend = input.Backend
- }
-
- if input.ClipSkip != 0 {
- config.Diffusers.ClipSkip = input.ClipSkip
- }
-
- if input.ModelBaseName != "" {
- config.AutoGPTQ.ModelBaseName = input.ModelBaseName
- }
-
- if input.NegativePromptScale != 0 {
- config.NegativePromptScale = input.NegativePromptScale
- }
-
- if input.UseFastTokenizer {
- config.UseFastTokenizer = input.UseFastTokenizer
- }
-
- if input.NegativePrompt != "" {
- config.NegativePrompt = input.NegativePrompt
- }
-
- if input.RopeFreqBase != 0 {
- config.RopeFreqBase = input.RopeFreqBase
- }
-
- if input.RopeFreqScale != 0 {
- config.RopeFreqScale = input.RopeFreqScale
- }
-
- if input.Grammar != "" {
- config.Grammar = input.Grammar
- }
-
- if input.Temperature != nil {
- config.Temperature = input.Temperature
- }
-
- if input.Maxtokens != nil {
- config.Maxtokens = input.Maxtokens
- }
-
- switch stop := input.Stop.(type) {
- case string:
- if stop != "" {
- config.StopWords = append(config.StopWords, stop)
- }
- case []interface{}:
- for _, pp := range stop {
- if s, ok := pp.(string); ok {
- config.StopWords = append(config.StopWords, s)
- }
- }
- }
-
- if len(input.Tools) > 0 {
- for _, tool := range input.Tools {
- input.Functions = append(input.Functions, tool.Function)
- }
- }
-
- if input.ToolsChoice != nil {
- var toolChoice grammar.Tool
-
- switch content := input.ToolsChoice.(type) {
- case string:
- _ = json.Unmarshal([]byte(content), &toolChoice)
- case map[string]interface{}:
- dat, _ := json.Marshal(content)
- _ = json.Unmarshal(dat, &toolChoice)
- }
- input.FunctionCall = map[string]interface{}{
- "name": toolChoice.Function.Name,
- }
- }
-
- // Decode each request's message content
- index := 0
- for i, m := range input.Messages {
- switch content := m.Content.(type) {
- case string:
- input.Messages[i].StringContent = content
- case []interface{}:
- dat, _ := json.Marshal(content)
- c := []schema.Content{}
- json.Unmarshal(dat, &c)
- for _, pp := range c {
- if pp.Type == "text" {
- input.Messages[i].StringContent = pp.Text
- } else if pp.Type == "image_url" {
- // Detect if pp.ImageURL is an URL, if it is download the image and encode it in base64:
- base64, err := getBase64Image(pp.ImageURL.URL)
- if err == nil {
- input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff
- // set a placeholder for each image
- input.Messages[i].StringContent = fmt.Sprintf("[img-%d]", index) + input.Messages[i].StringContent
- index++
- } else {
- fmt.Print("Failed encoding image", err)
- }
- }
- }
- }
- }
-
- if input.RepeatPenalty != 0 {
- config.RepeatPenalty = input.RepeatPenalty
- }
-
- if input.FrequencyPenalty != 0 {
- config.FrequencyPenalty = input.FrequencyPenalty
- }
-
- if input.PresencePenalty != 0 {
- config.PresencePenalty = input.PresencePenalty
- }
-
- if input.Keep != 0 {
- config.Keep = input.Keep
- }
-
- if input.Batch != 0 {
- config.Batch = input.Batch
- }
-
- if input.IgnoreEOS {
- config.IgnoreEOS = input.IgnoreEOS
- }
-
- if input.Seed != nil {
- config.Seed = input.Seed
- }
-
- if input.TypicalP != nil {
- config.TypicalP = input.TypicalP
- }
-
- switch inputs := input.Input.(type) {
- case string:
- if inputs != "" {
- config.InputStrings = append(config.InputStrings, inputs)
- }
- case []interface{}:
- for _, pp := range inputs {
- switch i := pp.(type) {
- case string:
- config.InputStrings = append(config.InputStrings, i)
- case []interface{}:
- tokens := []int{}
- for _, ii := range i {
- tokens = append(tokens, int(ii.(float64)))
- }
- config.InputToken = append(config.InputToken, tokens)
- }
- }
- }
-
- // Can be either a string or an object
- switch fnc := input.FunctionCall.(type) {
- case string:
- if fnc != "" {
- config.SetFunctionCallString(fnc)
- }
- case map[string]interface{}:
- var name string
- n, exists := fnc["name"]
- if exists {
- nn, e := n.(string)
- if e {
- name = nn
- }
- }
- config.SetFunctionCallNameString(name)
- }
-
- switch p := input.Prompt.(type) {
- case string:
- config.PromptStrings = append(config.PromptStrings, p)
- case []interface{}:
- for _, pp := range p {
- if s, ok := pp.(string); ok {
- config.PromptStrings = append(config.PromptStrings, s)
- }
- }
- }
-}
-
-func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *config.BackendConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.BackendConfig, *schema.OpenAIRequest, error) {
- cfg, err := cm.LoadBackendConfigFileByName(modelFile, loader.ModelPath,
- config.LoadOptionDebug(debug),
- config.LoadOptionThreads(threads),
- config.LoadOptionContextSize(ctx),
- config.LoadOptionF16(f16),
- )
-
- // Set the parameters for the language model prediction
- updateRequestConfig(cfg, input)
-
- return cfg, input, err
-}
diff --git a/core/http/endpoints/openai/transcription.go b/core/http/endpoints/openai/transcription.go
index c7dd39e7..572cec12 100644
--- a/core/http/endpoints/openai/transcription.go
+++ b/core/http/endpoints/openai/transcription.go
@@ -9,8 +9,7 @@ import (
"path/filepath"
"github.com/go-skynet/LocalAI/core/backend"
- "github.com/go-skynet/LocalAI/core/config"
- model "github.com/go-skynet/LocalAI/pkg/model"
+ fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
"github.com/gofiber/fiber/v2"
"github.com/rs/zerolog/log"
@@ -23,17 +22,15 @@ import (
// @Param file formData file true "file"
// @Success 200 {object} map[string]string "Response"
// @Router /v1/audio/transcriptions [post]
-func TranscriptEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+func TranscriptEndpoint(fce *fiberContext.FiberContextExtractor, tbs *backend.TranscriptionBackendService) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
- m, input, err := readRequest(c, ml, appConfig, false)
+ _, request, err := fce.OpenAIRequestFromContext(c, false)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
- config, input, err := mergeRequestWithConfig(m, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16)
- if err != nil {
- return fmt.Errorf("failed reading parameters from request:%w", err)
- }
+ // TODO: Investigate this file copy stuff later - potentially belongs in service.
+
// retrieve the file data from the request
file, err := c.FormFile("file")
if err != nil {
@@ -65,13 +62,16 @@ func TranscriptEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
log.Debug().Msgf("Audio file copied to: %+v", dst)
- tr, err := backend.ModelTranscription(dst, input.Language, ml, *config, appConfig)
- if err != nil {
- return err
- }
+ request.File = dst
- log.Debug().Msgf("Trascribed: %+v", tr)
+ responseChannel := tbs.Transcribe(request)
+ rawResponse := <-responseChannel
+
+ if rawResponse.Error != nil {
+ return rawResponse.Error
+ }
+ log.Debug().Msgf("Transcribed: %+v", rawResponse.Value)
// TODO: handle different outputs here
- return c.Status(http.StatusOK).JSON(tr)
+ return c.Status(http.StatusOK).JSON(rawResponse.Value)
}
}
diff --git a/core/schema/whisper.go b/core/schema/transcription.go
similarity index 90%
rename from core/schema/whisper.go
rename to core/schema/transcription.go
index 41413c1f..fe1799fa 100644
--- a/core/schema/whisper.go
+++ b/core/schema/transcription.go
@@ -10,7 +10,7 @@ type Segment struct {
Tokens []int `json:"tokens"`
}
-type Result struct {
+type TranscriptionResult struct {
Segments []Segment `json:"segments"`
Text string `json:"text"`
}
diff --git a/core/services/backend_monitor.go b/core/services/backend_monitor.go
index 979a67a3..a610432c 100644
--- a/core/services/backend_monitor.go
+++ b/core/services/backend_monitor.go
@@ -15,22 +15,22 @@ import (
gopsutil "github.com/shirou/gopsutil/v3/process"
)
-type BackendMonitor struct {
+type BackendMonitorService struct {
configLoader *config.BackendConfigLoader
modelLoader *model.ModelLoader
options *config.ApplicationConfig // Taking options in case we need to inspect ExternalGRPCBackends, though that's out of scope for now, hence the name.
}
-func NewBackendMonitor(configLoader *config.BackendConfigLoader, modelLoader *model.ModelLoader, appConfig *config.ApplicationConfig) BackendMonitor {
- return BackendMonitor{
+func NewBackendMonitorService(modelLoader *model.ModelLoader, configLoader *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *BackendMonitorService {
+ return &BackendMonitorService{
configLoader: configLoader,
modelLoader: modelLoader,
options: appConfig,
}
}
-func (bm BackendMonitor) getModelLoaderIDFromModelName(modelName string) (string, error) {
- config, exists := bm.configLoader.GetBackendConfig(modelName)
+func (bms BackendMonitorService) getModelLoaderIDFromModelName(modelName string) (string, error) {
+ config, exists := bms.configLoader.GetBackendConfig(modelName)
var backendId string
if exists {
backendId = config.Model
@@ -46,8 +46,8 @@ func (bm BackendMonitor) getModelLoaderIDFromModelName(modelName string) (string
return backendId, nil
}
-func (bm *BackendMonitor) SampleLocalBackendProcess(model string) (*schema.BackendMonitorResponse, error) {
- config, exists := bm.configLoader.GetBackendConfig(model)
+func (bms *BackendMonitorService) SampleLocalBackendProcess(model string) (*schema.BackendMonitorResponse, error) {
+ config, exists := bms.configLoader.GetBackendConfig(model)
var backend string
if exists {
backend = config.Model
@@ -60,7 +60,7 @@ func (bm *BackendMonitor) SampleLocalBackendProcess(model string) (*schema.Backe
backend = fmt.Sprintf("%s.bin", backend)
}
- pid, err := bm.modelLoader.GetGRPCPID(backend)
+ pid, err := bms.modelLoader.GetGRPCPID(backend)
if err != nil {
log.Error().Err(err).Str("model", model).Msg("failed to find GRPC pid")
@@ -101,12 +101,12 @@ func (bm *BackendMonitor) SampleLocalBackendProcess(model string) (*schema.Backe
}, nil
}
-func (bm BackendMonitor) CheckAndSample(modelName string) (*proto.StatusResponse, error) {
- backendId, err := bm.getModelLoaderIDFromModelName(modelName)
+func (bms BackendMonitorService) CheckAndSample(modelName string) (*proto.StatusResponse, error) {
+ backendId, err := bms.getModelLoaderIDFromModelName(modelName)
if err != nil {
return nil, err
}
- modelAddr := bm.modelLoader.CheckIsLoaded(backendId)
+ modelAddr := bms.modelLoader.CheckIsLoaded(backendId)
if modelAddr == "" {
return nil, fmt.Errorf("backend %s is not currently loaded", backendId)
}
@@ -114,7 +114,7 @@ func (bm BackendMonitor) CheckAndSample(modelName string) (*proto.StatusResponse
status, rpcErr := modelAddr.GRPC(false, nil).Status(context.TODO())
if rpcErr != nil {
log.Warn().Msgf("backend %s experienced an error retrieving status info: %s", backendId, rpcErr.Error())
- val, slbErr := bm.SampleLocalBackendProcess(backendId)
+ val, slbErr := bms.SampleLocalBackendProcess(backendId)
if slbErr != nil {
return nil, fmt.Errorf("backend %s experienced an error retrieving status info via rpc: %s, then failed local node process sample: %s", backendId, rpcErr.Error(), slbErr.Error())
}
@@ -131,10 +131,10 @@ func (bm BackendMonitor) CheckAndSample(modelName string) (*proto.StatusResponse
return status, nil
}
-func (bm BackendMonitor) ShutdownModel(modelName string) error {
- backendId, err := bm.getModelLoaderIDFromModelName(modelName)
+func (bms BackendMonitorService) ShutdownModel(modelName string) error {
+ backendId, err := bms.getModelLoaderIDFromModelName(modelName)
if err != nil {
return err
}
- return bm.modelLoader.ShutdownModel(backendId)
+ return bms.modelLoader.ShutdownModel(backendId)
}
diff --git a/core/services/gallery.go b/core/services/gallery.go
index b068abbb..1ef8e3e2 100644
--- a/core/services/gallery.go
+++ b/core/services/gallery.go
@@ -3,14 +3,18 @@ package services
import (
"context"
"encoding/json"
+ "errors"
"os"
+ "path/filepath"
"strings"
"sync"
"github.com/go-skynet/LocalAI/core/config"
+ "github.com/go-skynet/LocalAI/embedded"
+ "github.com/go-skynet/LocalAI/pkg/downloader"
"github.com/go-skynet/LocalAI/pkg/gallery"
- "github.com/go-skynet/LocalAI/pkg/startup"
"github.com/go-skynet/LocalAI/pkg/utils"
+ "github.com/rs/zerolog/log"
"gopkg.in/yaml.v2"
)
@@ -29,18 +33,6 @@ func NewGalleryService(modelPath string) *GalleryService {
}
}
-func prepareModel(modelPath string, req gallery.GalleryModel, cl *config.BackendConfigLoader, downloadStatus func(string, string, string, float64)) error {
-
- config, err := gallery.GetGalleryConfigFromURL(req.URL)
- if err != nil {
- return err
- }
-
- config.Files = append(config.Files, req.AdditionalFiles...)
-
- return gallery.InstallModel(modelPath, req.Name, &config, req.Overrides, downloadStatus)
-}
-
func (g *GalleryService) UpdateStatus(s string, op *gallery.GalleryOpStatus) {
g.Lock()
defer g.Unlock()
@@ -92,10 +84,10 @@ func (g *GalleryService) Start(c context.Context, cl *config.BackendConfigLoader
err = gallery.InstallModelFromGalleryByName(op.Galleries, op.GalleryName, g.modelPath, op.Req, progressCallback)
}
} else if op.ConfigURL != "" {
- startup.PreloadModelsConfigurations(op.ConfigURL, g.modelPath, op.ConfigURL)
+ PreloadModelsConfigurations(op.ConfigURL, g.modelPath, op.ConfigURL)
err = cl.Preload(g.modelPath)
} else {
- err = prepareModel(g.modelPath, op.Req, cl, progressCallback)
+ err = prepareModel(g.modelPath, op.Req, progressCallback)
}
if err != nil {
@@ -127,13 +119,12 @@ type galleryModel struct {
ID string `json:"id"`
}
-func processRequests(modelPath, s string, cm *config.BackendConfigLoader, galleries []gallery.Gallery, requests []galleryModel) error {
+func processRequests(modelPath string, galleries []gallery.Gallery, requests []galleryModel) error {
var err error
for _, r := range requests {
utils.ResetDownloadTimers()
if r.ID == "" {
- err = prepareModel(modelPath, r.GalleryModel, cm, utils.DisplayDownloadFunction)
-
+ err = prepareModel(modelPath, r.GalleryModel, utils.DisplayDownloadFunction)
} else {
if strings.Contains(r.ID, "@") {
err = gallery.InstallModelFromGallery(
@@ -158,7 +149,7 @@ func ApplyGalleryFromFile(modelPath, s string, cl *config.BackendConfigLoader, g
return err
}
- return processRequests(modelPath, s, cl, galleries, requests)
+ return processRequests(modelPath, galleries, requests)
}
func ApplyGalleryFromString(modelPath, s string, cl *config.BackendConfigLoader, galleries []gallery.Gallery) error {
@@ -168,5 +159,90 @@ func ApplyGalleryFromString(modelPath, s string, cl *config.BackendConfigLoader,
return err
}
- return processRequests(modelPath, s, cl, galleries, requests)
+ return processRequests(modelPath, galleries, requests)
+}
+
+// PreloadModelsConfigurations will preload models from the given list of URLs
+// It will download the model if it is not already present in the model path
+// It will also try to resolve if the model is an embedded model YAML configuration
+func PreloadModelsConfigurations(modelLibraryURL string, modelPath string, models ...string) {
+ for _, url := range models {
+
+ // As a best effort, try to resolve the model from the remote library
+ // if it's not resolved we try with the other method below
+ if modelLibraryURL != "" {
+ lib, err := embedded.GetRemoteLibraryShorteners(modelLibraryURL)
+ if err == nil {
+ if lib[url] != "" {
+ log.Debug().Msgf("[startup] model configuration is defined remotely: %s (%s)", url, lib[url])
+ url = lib[url]
+ }
+ }
+ }
+
+ url = embedded.ModelShortURL(url)
+ switch {
+ case embedded.ExistsInModelsLibrary(url):
+ modelYAML, err := embedded.ResolveContent(url)
+ // If we resolve something, just save it to disk and continue
+ if err != nil {
+ log.Error().Err(err).Msg("error resolving model content")
+ continue
+ }
+
+ log.Debug().Msgf("[startup] resolved embedded model: %s", url)
+ md5Name := utils.MD5(url)
+ modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml"
+ if err := os.WriteFile(modelDefinitionFilePath, modelYAML, os.ModePerm); err != nil {
+ log.Error().Err(err).Str("filepath", modelDefinitionFilePath).Msg("error writing model definition")
+ }
+ case downloader.LooksLikeURL(url):
+ log.Debug().Msgf("[startup] resolved model to download: %s", url)
+
+ // md5 of model name
+ md5Name := utils.MD5(url)
+
+ // check if file exists
+ if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) {
+ modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml"
+ err := downloader.DownloadFile(url, modelDefinitionFilePath, "", func(fileName, current, total string, percent float64) {
+ utils.DisplayDownloadFunction(fileName, current, total, percent)
+ })
+ if err != nil {
+ log.Error().Err(err).Str("url", url).Str("filepath", modelDefinitionFilePath).Msg("error downloading model")
+ }
+ }
+ default:
+ if _, err := os.Stat(url); err == nil {
+ log.Debug().Msgf("[startup] resolved local model: %s", url)
+ // copy to modelPath
+ md5Name := utils.MD5(url)
+
+ modelYAML, err := os.ReadFile(url)
+ if err != nil {
+ log.Error().Err(err).Str("filepath", url).Msg("error reading model definition")
+ continue
+ }
+
+ modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml"
+ if err := os.WriteFile(modelDefinitionFilePath, modelYAML, os.ModePerm); err != nil {
+ log.Error().Err(err).Str("filepath", modelDefinitionFilePath).Msg("error loading model: %s")
+ }
+ } else {
+ log.Warn().Msgf("[startup] failed resolving model '%s'", url)
+ }
+ }
+ }
+}
+
+func prepareModel(modelPath string, req gallery.GalleryModel, downloadStatus func(string, string, string, float64)) error {
+
+ config, err := gallery.GetGalleryConfigFromURL(req.URL)
+ if err != nil {
+ return err
+ }
+
+ config.Files = append(config.Files, req.AdditionalFiles...)
+
+ return gallery.InstallModel(modelPath, req.Name, &config, req.Overrides, downloadStatus)
}
diff --git a/core/services/list_models.go b/core/services/list_models.go
new file mode 100644
index 00000000..a21e6faf
--- /dev/null
+++ b/core/services/list_models.go
@@ -0,0 +1,72 @@
+package services
+
+import (
+ "regexp"
+
+ "github.com/go-skynet/LocalAI/core/config"
+ "github.com/go-skynet/LocalAI/core/schema"
+ "github.com/go-skynet/LocalAI/pkg/model"
+)
+
+type ListModelsService struct {
+ bcl *config.BackendConfigLoader
+ ml *model.ModelLoader
+ appConfig *config.ApplicationConfig
+}
+
+func NewListModelsService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *ListModelsService {
+ return &ListModelsService{
+ bcl: bcl,
+ ml: ml,
+ appConfig: appConfig,
+ }
+}
+
+func (lms *ListModelsService) ListModels(filter string, excludeConfigured bool) ([]schema.OpenAIModel, error) {
+
+ models, err := lms.ml.ListModels()
+ if err != nil {
+ return nil, err
+ }
+
+ var mm map[string]interface{} = map[string]interface{}{}
+
+ dataModels := []schema.OpenAIModel{}
+
+ var filterFn func(name string) bool
+
+ // If filter is not specified, do not filter the list by model name
+ if filter == "" {
+ filterFn = func(_ string) bool { return true }
+ } else {
+ // If filter _IS_ specified, we compile it to a regex which is used to create the filterFn
+ rxp, err := regexp.Compile(filter)
+ if err != nil {
+ return nil, err
+ }
+ filterFn = func(name string) bool {
+ return rxp.MatchString(name)
+ }
+ }
+
+ // Start with the known configurations
+ for _, c := range lms.bcl.GetAllBackendConfigs() {
+ if excludeConfigured {
+ mm[c.Model] = nil
+ }
+
+ if filterFn(c.Name) {
+ dataModels = append(dataModels, schema.OpenAIModel{ID: c.Name, Object: "model"})
+ }
+ }
+
+ // Then iterate through the loose files:
+ for _, m := range models {
+ // And only adds them if they shouldn't be skipped.
+ if _, exists := mm[m]; !exists && filterFn(m) {
+ dataModels = append(dataModels, schema.OpenAIModel{ID: m, Object: "model"})
+ }
+ }
+
+ return dataModels, nil
+}
diff --git a/pkg/startup/model_preload_test.go b/core/services/model_preload_test.go
similarity index 96%
rename from pkg/startup/model_preload_test.go
rename to core/services/model_preload_test.go
index 63a8f8b0..fc65d565 100644
--- a/pkg/startup/model_preload_test.go
+++ b/core/services/model_preload_test.go
@@ -1,13 +1,14 @@
-package startup_test
+package services_test
import (
"fmt"
"os"
"path/filepath"
- . "github.com/go-skynet/LocalAI/pkg/startup"
"github.com/go-skynet/LocalAI/pkg/utils"
+ . "github.com/go-skynet/LocalAI/core/services"
+
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
diff --git a/core/services/openai.go b/core/services/openai.go
new file mode 100644
index 00000000..0f61d6f4
--- /dev/null
+++ b/core/services/openai.go
@@ -0,0 +1,805 @@
+package services
+
+import (
+ "encoding/json"
+ "errors"
+ "fmt"
+ "strings"
+ "sync"
+ "time"
+
+ "github.com/go-skynet/LocalAI/core/backend"
+ "github.com/go-skynet/LocalAI/core/config"
+ "github.com/go-skynet/LocalAI/core/schema"
+ "github.com/go-skynet/LocalAI/pkg/concurrency"
+ "github.com/go-skynet/LocalAI/pkg/grammar"
+ "github.com/go-skynet/LocalAI/pkg/model"
+ "github.com/go-skynet/LocalAI/pkg/utils"
+ "github.com/google/uuid"
+ "github.com/imdario/mergo"
+ "github.com/rs/zerolog/log"
+)
+
+type endpointGenerationConfigurationFn func(bc *config.BackendConfig, request *schema.OpenAIRequest) endpointConfiguration
+
+type endpointConfiguration struct {
+ SchemaObject string
+ TemplatePath string
+ TemplateData model.PromptTemplateData
+ ResultMappingFn func(resp *backend.LLMResponse, index int) schema.Choice
+ CompletionMappingFn func(resp concurrency.ErrorOr[*backend.LLMResponse]) concurrency.ErrorOr[*schema.OpenAIResponse]
+ TokenMappingFn func(resp concurrency.ErrorOr[*backend.LLMResponse]) concurrency.ErrorOr[*schema.OpenAIResponse]
+}
+
+// TODO: This is used for completion and edit. I am pretty sure I forgot parts, but fix it later.
+func simpleMapper(resp concurrency.ErrorOr[*backend.LLMResponse]) concurrency.ErrorOr[*schema.OpenAIResponse] {
+ if resp.Error != nil || resp.Value == nil {
+ return concurrency.ErrorOr[*schema.OpenAIResponse]{Error: resp.Error}
+ }
+ return concurrency.ErrorOr[*schema.OpenAIResponse]{
+ Value: &schema.OpenAIResponse{
+ Choices: []schema.Choice{
+ {
+ Text: resp.Value.Response,
+ },
+ },
+ Usage: schema.OpenAIUsage{
+ PromptTokens: resp.Value.Usage.Prompt,
+ CompletionTokens: resp.Value.Usage.Completion,
+ TotalTokens: resp.Value.Usage.Prompt + resp.Value.Usage.Completion,
+ },
+ },
+ }
+}
+
+// TODO: Consider alternative names for this.
+// The purpose of this struct is to hold a reference to the OpenAI request context information
+// This keeps things simple within core/services/openai.go and allows consumers to "see" this information if they need it
+type OpenAIRequestTraceID struct {
+ ID string
+ Created int
+}
+
+// This type split out from core/backend/llm.go - I'm still not _totally_ sure about this, but it seems to make sense to keep the generic LLM code from the OpenAI specific higher level functionality
+type OpenAIService struct {
+ bcl *config.BackendConfigLoader
+ ml *model.ModelLoader
+ appConfig *config.ApplicationConfig
+ llmbs *backend.LLMBackendService
+}
+
+func NewOpenAIService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig, llmbs *backend.LLMBackendService) *OpenAIService {
+ return &OpenAIService{
+ bcl: bcl,
+ ml: ml,
+ appConfig: appConfig,
+ llmbs: llmbs,
+ }
+}
+
+// Keeping in place as a reminder to POTENTIALLY ADD MORE VALIDATION HERE???
+func (oais *OpenAIService) getConfig(request *schema.OpenAIRequest) (*config.BackendConfig, *schema.OpenAIRequest, error) {
+ return oais.bcl.LoadBackendConfigForModelAndOpenAIRequest(request.Model, request, oais.appConfig)
+}
+
+// TODO: It would be a lot less messy to make a return struct that had references to each of these channels
+// INTENTIONALLY not doing that quite yet - I believe we need to let the references to unused channels die for the GC to automatically collect -- can we manually free()?
+// finalResultsChannel is the primary async return path: one result for the entire request.
+// promptResultsChannels is DUBIOUS. It's expected to be raw fan-out used within the function itself, but I am exposing for testing? One bundle of LLMResponseBundle per PromptString? Gets all N completions for a single prompt.
+// completionsChannel is a channel that emits one *LLMResponse per generated completion, be that different prompts or N. Seems the most useful other than "entire request" Request is available to attempt tracing???
+// tokensChannel is a channel that emits one *LLMResponse per generated token. Let's see what happens!
+func (oais *OpenAIService) Completion(request *schema.OpenAIRequest, notifyOnPromptResult bool, notifyOnToken bool) (
+ traceID *OpenAIRequestTraceID, finalResultChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], promptResultsChannels []<-chan concurrency.ErrorOr[*backend.LLMResponseBundle],
+ completionsChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], tokenChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], err error) {
+
+ return oais.GenerateTextFromRequest(request, func(bc *config.BackendConfig, request *schema.OpenAIRequest) endpointConfiguration {
+ return endpointConfiguration{
+ SchemaObject: "text_completion",
+ TemplatePath: bc.TemplateConfig.Completion,
+ TemplateData: model.PromptTemplateData{
+ SystemPrompt: bc.SystemPrompt,
+ },
+ ResultMappingFn: func(resp *backend.LLMResponse, promptIndex int) schema.Choice {
+ return schema.Choice{
+ Index: promptIndex,
+ FinishReason: "stop",
+ Text: resp.Response,
+ }
+ },
+ CompletionMappingFn: simpleMapper,
+ TokenMappingFn: simpleMapper,
+ }
+ }, notifyOnPromptResult, notifyOnToken, nil)
+}
+
+func (oais *OpenAIService) Edit(request *schema.OpenAIRequest, notifyOnPromptResult bool, notifyOnToken bool) (
+ traceID *OpenAIRequestTraceID, finalResultChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], promptResultsChannels []<-chan concurrency.ErrorOr[*backend.LLMResponseBundle],
+ completionsChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], tokenChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], err error) {
+
+ return oais.GenerateTextFromRequest(request, func(bc *config.BackendConfig, request *schema.OpenAIRequest) endpointConfiguration {
+
+ return endpointConfiguration{
+ SchemaObject: "edit",
+ TemplatePath: bc.TemplateConfig.Edit,
+ TemplateData: model.PromptTemplateData{
+ SystemPrompt: bc.SystemPrompt,
+ Instruction: request.Instruction,
+ },
+ ResultMappingFn: func(resp *backend.LLMResponse, promptIndex int) schema.Choice {
+ return schema.Choice{
+ Index: promptIndex,
+ FinishReason: "stop",
+ Text: resp.Response,
+ }
+ },
+ CompletionMappingFn: simpleMapper,
+ TokenMappingFn: simpleMapper,
+ }
+ }, notifyOnPromptResult, notifyOnToken, nil)
+}
+
+func (oais *OpenAIService) Chat(request *schema.OpenAIRequest, notifyOnPromptResult bool, notifyOnToken bool) (
+ traceID *OpenAIRequestTraceID, finalResultChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse],
+ completionsChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], tokenChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], err error) {
+
+ return oais.GenerateFromMultipleMessagesChatRequest(request, notifyOnPromptResult, notifyOnToken, nil)
+}
+
+func (oais *OpenAIService) GenerateTextFromRequest(request *schema.OpenAIRequest, endpointConfigFn endpointGenerationConfigurationFn, notifyOnPromptResult bool, notifyOnToken bool, initialTraceID *OpenAIRequestTraceID) (
+ traceID *OpenAIRequestTraceID, finalResultChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], promptResultsChannels []<-chan concurrency.ErrorOr[*backend.LLMResponseBundle],
+ completionsChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], tokenChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], err error) {
+
+ if initialTraceID == nil {
+ traceID = &OpenAIRequestTraceID{
+ ID: uuid.New().String(),
+ Created: int(time.Now().Unix()),
+ }
+ } else {
+ traceID = initialTraceID
+ }
+
+ bc, request, err := oais.getConfig(request)
+ if err != nil {
+ log.Error().Msgf("[oais::GenerateTextFromRequest] error getting configuration: %q", err)
+ return
+ }
+
+ if request.ResponseFormat.Type == "json_object" {
+ request.Grammar = grammar.JSONBNF
+ }
+
+ bc.Grammar = request.Grammar
+
+ if request.Stream && len(bc.PromptStrings) > 1 {
+ log.Warn().Msg("potentially cannot handle more than 1 `PromptStrings` when Streaming?")
+ }
+
+ rawFinalResultChannel := make(chan concurrency.ErrorOr[*schema.OpenAIResponse])
+ finalResultChannel = rawFinalResultChannel
+ promptResultsChannels = []<-chan concurrency.ErrorOr[*backend.LLMResponseBundle]{}
+ var rawCompletionsChannel chan concurrency.ErrorOr[*schema.OpenAIResponse]
+ var rawTokenChannel chan concurrency.ErrorOr[*schema.OpenAIResponse]
+ if notifyOnPromptResult {
+ rawCompletionsChannel = make(chan concurrency.ErrorOr[*schema.OpenAIResponse])
+ }
+ if notifyOnToken {
+ rawTokenChannel = make(chan concurrency.ErrorOr[*schema.OpenAIResponse])
+ }
+
+ promptResultsChannelLock := sync.Mutex{}
+
+ endpointConfig := endpointConfigFn(bc, request)
+
+ if len(endpointConfig.TemplatePath) == 0 {
+ // A model can have a "file.bin.tmpl" file associated with a prompt template prefix
+ if oais.ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", bc.Model)) {
+ endpointConfig.TemplatePath = bc.Model
+ } else {
+ log.Warn().Msgf("failed to find any template for %+v", request)
+ }
+ }
+
+ setupWG := sync.WaitGroup{}
+ var prompts []string
+ if lPS := len(bc.PromptStrings); lPS > 0 {
+ setupWG.Add(lPS)
+ prompts = bc.PromptStrings
+ } else {
+ setupWG.Add(len(bc.InputStrings))
+ prompts = bc.InputStrings
+ }
+
+ var setupError error = nil
+
+ for pI, p := range prompts {
+
+ go func(promptIndex int, prompt string) {
+ if endpointConfig.TemplatePath != "" {
+ promptTemplateData := model.PromptTemplateData{
+ Input: prompt,
+ }
+ err := mergo.Merge(promptTemplateData, endpointConfig.TemplateData, mergo.WithOverride)
+ if err == nil {
+ templatedInput, err := oais.ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, endpointConfig.TemplatePath, promptTemplateData)
+ if err == nil {
+ prompt = templatedInput
+ log.Debug().Msgf("Template found, input modified to: %s", prompt)
+ }
+ }
+ }
+
+ log.Debug().Msgf("[OAIS GenerateTextFromRequest] Prompt: %q", prompt)
+ promptResultsChannel, completionChannels, tokenChannels, err := oais.llmbs.GenerateText(prompt, request, bc,
+ func(r *backend.LLMResponse) schema.Choice {
+ return endpointConfig.ResultMappingFn(r, promptIndex)
+ }, notifyOnPromptResult, notifyOnToken)
+ if err != nil {
+ log.Error().Msgf("Unable to generate text prompt: %q\nerr: %q", prompt, err)
+ promptResultsChannelLock.Lock()
+ setupError = errors.Join(setupError, err)
+ promptResultsChannelLock.Unlock()
+ setupWG.Done()
+ return
+ }
+ if notifyOnPromptResult {
+ concurrency.SliceOfChannelsRawMergerWithoutMapping(concurrency.SliceOfChannelsTransformer(completionChannels, endpointConfig.CompletionMappingFn), rawCompletionsChannel, true)
+ }
+ if notifyOnToken {
+ concurrency.SliceOfChannelsRawMergerWithoutMapping(concurrency.SliceOfChannelsTransformer(tokenChannels, endpointConfig.TokenMappingFn), rawTokenChannel, true)
+ }
+ promptResultsChannelLock.Lock()
+ promptResultsChannels = append(promptResultsChannels, promptResultsChannel)
+ promptResultsChannelLock.Unlock()
+ setupWG.Done()
+ }(pI, p)
+
+ }
+ setupWG.Wait()
+
+ // If any of the setup goroutines experienced an error, quit early here.
+ if setupError != nil {
+ go func() {
+ log.Error().Msgf("[OAIS GenerateTextFromRequest] caught an error during setup: %q", setupError)
+ rawFinalResultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: setupError}
+ close(rawFinalResultChannel)
+ }()
+ return
+ }
+
+ initialResponse := &schema.OpenAIResponse{
+ ID: traceID.ID,
+ Created: traceID.Created,
+ Model: request.Model,
+ Object: endpointConfig.SchemaObject,
+ Usage: schema.OpenAIUsage{},
+ }
+
+ // utils.SliceOfChannelsRawMerger[[]schema.Choice](promptResultsChannels, rawFinalResultChannel, func(results []schema.Choice) (*schema.OpenAIResponse, error) {
+ concurrency.SliceOfChannelsReducer(
+ promptResultsChannels, rawFinalResultChannel,
+ func(iv concurrency.ErrorOr[*backend.LLMResponseBundle], result concurrency.ErrorOr[*schema.OpenAIResponse]) concurrency.ErrorOr[*schema.OpenAIResponse] {
+
+ if iv.Error != nil {
+ result.Error = iv.Error
+ return result
+ }
+ result.Value.Usage.PromptTokens += iv.Value.Usage.Prompt
+ result.Value.Usage.CompletionTokens += iv.Value.Usage.Completion
+ result.Value.Usage.TotalTokens = result.Value.Usage.PromptTokens + result.Value.Usage.CompletionTokens
+
+ result.Value.Choices = append(result.Value.Choices, iv.Value.Response...)
+
+ return result
+ }, concurrency.ErrorOr[*schema.OpenAIResponse]{Value: initialResponse}, true)
+
+ completionsChannel = rawCompletionsChannel
+ tokenChannel = rawTokenChannel
+
+ return
+}
+
+// TODO: For porting sanity, this is distinct from GenerateTextFromRequest and is _currently_ specific to Chat purposes
+// this is not a final decision -- just a reality of moving a lot of parts at once
+// / This has _become_ Chat which wasn't the goal... More cleanup in the future once it's stable?
+func (oais *OpenAIService) GenerateFromMultipleMessagesChatRequest(request *schema.OpenAIRequest, notifyOnPromptResult bool, notifyOnToken bool, initialTraceID *OpenAIRequestTraceID) (
+ traceID *OpenAIRequestTraceID, finalResultChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse],
+ completionsChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], tokenChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], err error) {
+
+ if initialTraceID == nil {
+ traceID = &OpenAIRequestTraceID{
+ ID: uuid.New().String(),
+ Created: int(time.Now().Unix()),
+ }
+ } else {
+ traceID = initialTraceID
+ }
+
+ bc, request, err := oais.getConfig(request)
+ if err != nil {
+ return
+ }
+
+ // Allow the user to set custom actions via config file
+ // to be "embedded" in each model
+ noActionName := "answer"
+ noActionDescription := "use this action to answer without performing any action"
+
+ if bc.FunctionsConfig.NoActionFunctionName != "" {
+ noActionName = bc.FunctionsConfig.NoActionFunctionName
+ }
+ if bc.FunctionsConfig.NoActionDescriptionName != "" {
+ noActionDescription = bc.FunctionsConfig.NoActionDescriptionName
+ }
+
+ if request.ResponseFormat.Type == "json_object" {
+ request.Grammar = grammar.JSONBNF
+ }
+
+ bc.Grammar = request.Grammar
+
+ processFunctions := false
+ funcs := grammar.Functions{}
+ // process functions if we have any defined or if we have a function call string
+ if len(request.Functions) > 0 && bc.ShouldUseFunctions() {
+ log.Debug().Msgf("Response needs to process functions")
+
+ processFunctions = true
+
+ noActionGrammar := grammar.Function{
+ Name: noActionName,
+ Description: noActionDescription,
+ Parameters: map[string]interface{}{
+ "properties": map[string]interface{}{
+ "message": map[string]interface{}{
+ "type": "string",
+ "description": "The message to reply the user with",
+ }},
+ },
+ }
+
+ // Append the no action function
+ funcs = append(funcs, request.Functions...)
+ if !bc.FunctionsConfig.DisableNoAction {
+ funcs = append(funcs, noActionGrammar)
+ }
+
+ // Force picking one of the functions by the request
+ if bc.FunctionToCall() != "" {
+ funcs = funcs.Select(bc.FunctionToCall())
+ }
+
+ // Update input grammar
+ jsStruct := funcs.ToJSONStructure()
+ bc.Grammar = jsStruct.Grammar("", bc.FunctionsConfig.ParallelCalls)
+ } else if request.JSONFunctionGrammarObject != nil {
+ bc.Grammar = request.JSONFunctionGrammarObject.Grammar("", bc.FunctionsConfig.ParallelCalls)
+ }
+
+ if request.Stream && processFunctions {
+ log.Warn().Msg("Streaming + Functions is highly experimental in this version")
+ }
+
+ var predInput string
+
+ if !bc.TemplateConfig.UseTokenizerTemplate || processFunctions {
+
+ suppressConfigSystemPrompt := false
+ mess := []string{}
+ for messageIndex, i := range request.Messages {
+ var content string
+ role := i.Role
+
+ // if function call, we might want to customize the role so we can display better that the "assistant called a json action"
+ // if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request
+ if (i.FunctionCall != nil || i.ToolCalls != nil) && i.Role == "assistant" {
+ roleFn := "assistant_function_call"
+ r := bc.Roles[roleFn]
+ if r != "" {
+ role = roleFn
+ }
+ }
+ r := bc.Roles[role]
+ contentExists := i.Content != nil && i.StringContent != ""
+
+ fcall := i.FunctionCall
+ if len(i.ToolCalls) > 0 {
+ fcall = i.ToolCalls
+ }
+
+ // First attempt to populate content via a chat message specific template
+ if bc.TemplateConfig.ChatMessage != "" {
+ chatMessageData := model.ChatMessageTemplateData{
+ SystemPrompt: bc.SystemPrompt,
+ Role: r,
+ RoleName: role,
+ Content: i.StringContent,
+ FunctionCall: fcall,
+ FunctionName: i.Name,
+ LastMessage: messageIndex == (len(request.Messages) - 1),
+ Function: bc.Grammar != "" && (messageIndex == (len(request.Messages) - 1)),
+ MessageIndex: messageIndex,
+ }
+ templatedChatMessage, err := oais.ml.EvaluateTemplateForChatMessage(bc.TemplateConfig.ChatMessage, chatMessageData)
+ if err != nil {
+ log.Error().Msgf("error processing message %+v using template \"%s\": %v. Skipping!", chatMessageData, bc.TemplateConfig.ChatMessage, err)
+ } else {
+ if templatedChatMessage == "" {
+ log.Warn().Msgf("template \"%s\" produced blank output for %+v. Skipping!", bc.TemplateConfig.ChatMessage, chatMessageData)
+ continue // TODO: This continue is here intentionally to skip over the line `mess = append(mess, content)` below, and to prevent the sprintf
+ }
+ log.Debug().Msgf("templated message for chat: %s", templatedChatMessage)
+ content = templatedChatMessage
+ }
+ }
+ marshalAnyRole := func(f any) {
+ j, err := json.Marshal(f)
+ if err == nil {
+ if contentExists {
+ content += "\n" + fmt.Sprint(r, " ", string(j))
+ } else {
+ content = fmt.Sprint(r, " ", string(j))
+ }
+ }
+ }
+ marshalAny := func(f any) {
+ j, err := json.Marshal(f)
+ if err == nil {
+ if contentExists {
+ content += "\n" + string(j)
+ } else {
+ content = string(j)
+ }
+ }
+ }
+ // If this model doesn't have such a template, or if that template fails to return a value, template at the message level.
+ if content == "" {
+ if r != "" {
+ if contentExists {
+ content = fmt.Sprint(r, i.StringContent)
+ }
+
+ if i.FunctionCall != nil {
+ marshalAnyRole(i.FunctionCall)
+ }
+ } else {
+ if contentExists {
+ content = fmt.Sprint(i.StringContent)
+ }
+
+ if i.FunctionCall != nil {
+ marshalAny(i.FunctionCall)
+ }
+
+ if i.ToolCalls != nil {
+ marshalAny(i.ToolCalls)
+ }
+ }
+ // Special Handling: System. We care if it was printed at all, not the r branch, so check seperately
+ if contentExists && role == "system" {
+ suppressConfigSystemPrompt = true
+ }
+ }
+
+ mess = append(mess, content)
+ }
+
+ predInput = strings.Join(mess, "\n")
+
+ log.Debug().Msgf("Prompt (before templating): %s", predInput)
+
+ templateFile := ""
+ // A model can have a "file.bin.tmpl" file associated with a prompt template prefix
+ if oais.ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", bc.Model)) {
+ templateFile = bc.Model
+ }
+
+ if bc.TemplateConfig.Chat != "" && !processFunctions {
+ templateFile = bc.TemplateConfig.Chat
+ }
+
+ if bc.TemplateConfig.Functions != "" && processFunctions {
+ templateFile = bc.TemplateConfig.Functions
+ }
+
+ if templateFile != "" {
+ templatedInput, err := oais.ml.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{
+ SystemPrompt: bc.SystemPrompt,
+ SuppressSystemPrompt: suppressConfigSystemPrompt,
+ Input: predInput,
+ Functions: funcs,
+ })
+ if err == nil {
+ predInput = templatedInput
+ log.Debug().Msgf("Template found, input modified to: %s", predInput)
+ } else {
+ log.Debug().Msgf("Template failed loading: %s", err.Error())
+ }
+ }
+ }
+ log.Debug().Msgf("Prompt (after templating): %s", predInput)
+ if processFunctions {
+ log.Debug().Msgf("Grammar: %+v", bc.Grammar)
+ }
+
+ rawFinalResultChannel := make(chan concurrency.ErrorOr[*schema.OpenAIResponse])
+ var rawCompletionsChannel chan concurrency.ErrorOr[*schema.OpenAIResponse]
+ var rawTokenChannel chan concurrency.ErrorOr[*schema.OpenAIResponse]
+ if notifyOnPromptResult {
+ rawCompletionsChannel = make(chan concurrency.ErrorOr[*schema.OpenAIResponse])
+ }
+ if notifyOnToken {
+ rawTokenChannel = make(chan concurrency.ErrorOr[*schema.OpenAIResponse])
+ }
+
+ rawResultChannel, individualCompletionChannels, tokenChannels, err := oais.llmbs.GenerateText(predInput, request, bc, func(resp *backend.LLMResponse) schema.Choice {
+ return schema.Choice{
+ Index: 0, // ???
+ FinishReason: "stop",
+ Message: &schema.Message{
+ Role: "assistant",
+ Content: resp.Response,
+ },
+ }
+ }, notifyOnPromptResult, notifyOnToken)
+
+ chatSimpleMappingFn := func(resp concurrency.ErrorOr[*backend.LLMResponse]) concurrency.ErrorOr[*schema.OpenAIResponse] {
+ if resp.Error != nil || resp.Value == nil {
+ return concurrency.ErrorOr[*schema.OpenAIResponse]{Error: resp.Error}
+ }
+ return concurrency.ErrorOr[*schema.OpenAIResponse]{
+ Value: &schema.OpenAIResponse{
+ ID: traceID.ID,
+ Created: traceID.Created,
+ Model: request.Model, // we have to return what the user sent here, due to OpenAI spec.
+ Choices: []schema.Choice{
+ {
+ Delta: &schema.Message{
+ Role: "assistant",
+ Content: resp.Value.Response,
+ },
+ Index: 0,
+ },
+ },
+ Object: "chat.completion.chunk",
+ Usage: schema.OpenAIUsage{
+ PromptTokens: resp.Value.Usage.Prompt,
+ CompletionTokens: resp.Value.Usage.Completion,
+ TotalTokens: resp.Value.Usage.Prompt + resp.Value.Usage.Completion,
+ },
+ },
+ }
+ }
+
+ if notifyOnPromptResult {
+ concurrency.SliceOfChannelsRawMergerWithoutMapping(concurrency.SliceOfChannelsTransformer(individualCompletionChannels, chatSimpleMappingFn), rawCompletionsChannel, true)
+ }
+ if notifyOnToken {
+ concurrency.SliceOfChannelsRawMergerWithoutMapping(concurrency.SliceOfChannelsTransformer(tokenChannels, chatSimpleMappingFn), rawTokenChannel, true)
+ }
+
+ go func() {
+ rawResult := <-rawResultChannel
+ if rawResult.Error != nil {
+ log.Warn().Msgf("OpenAIService::processTools GenerateText error [DEBUG THIS?] %q", rawResult.Error)
+ return
+ }
+ llmResponseChoices := rawResult.Value.Response
+
+ if processFunctions && len(llmResponseChoices) > 1 {
+ log.Warn().Msgf("chat functions response with %d choices in response, debug this?", len(llmResponseChoices))
+ log.Debug().Msgf("%+v", llmResponseChoices)
+ }
+
+ for _, result := range rawResult.Value.Response {
+ // If no functions, just return the raw result.
+ if !processFunctions {
+
+ resp := schema.OpenAIResponse{
+ ID: traceID.ID,
+ Created: traceID.Created,
+ Model: request.Model, // we have to return what the user sent here, due to OpenAI spec.
+ Choices: []schema.Choice{result},
+ Object: "chat.completion.chunk",
+ Usage: schema.OpenAIUsage{
+ PromptTokens: rawResult.Value.Usage.Prompt,
+ CompletionTokens: rawResult.Value.Usage.Completion,
+ TotalTokens: rawResult.Value.Usage.Prompt + rawResult.Value.Usage.Prompt,
+ },
+ }
+
+ rawFinalResultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: &resp}
+
+ continue
+ }
+ // At this point, things are function specific!
+
+ // Oh no this can't be the right way to do this... but it works. Save us, mudler!
+ fString := fmt.Sprintf("%s", result.Message.Content)
+ results := parseFunctionCall(fString, bc.FunctionsConfig.ParallelCalls)
+ noActionToRun := (len(results) > 0 && results[0].name == noActionName)
+
+ if noActionToRun {
+ log.Debug().Msg("-- noActionToRun branch --")
+ initialMessage := schema.OpenAIResponse{
+ ID: traceID.ID,
+ Created: traceID.Created,
+ Model: request.Model, // we have to return what the user sent here, due to OpenAI spec.
+ Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: ""}}},
+ Object: "stop",
+ }
+ rawFinalResultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: &initialMessage}
+
+ result, err := oais.handleQuestion(bc, request, results[0].arguments, predInput)
+ if err != nil {
+ log.Error().Msgf("error handling question: %s", err.Error())
+ return
+ }
+
+ resp := schema.OpenAIResponse{
+ ID: traceID.ID,
+ Created: traceID.Created,
+ Model: request.Model, // we have to return what the user sent here, due to OpenAI spec.
+ Choices: []schema.Choice{{Delta: &schema.Message{Content: &result}, Index: 0}},
+ Object: "chat.completion.chunk",
+ Usage: schema.OpenAIUsage{
+ PromptTokens: rawResult.Value.Usage.Prompt,
+ CompletionTokens: rawResult.Value.Usage.Completion,
+ TotalTokens: rawResult.Value.Usage.Prompt + rawResult.Value.Usage.Prompt,
+ },
+ }
+
+ rawFinalResultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: &resp}
+
+ } else {
+ log.Debug().Msgf("[GenerateFromMultipleMessagesChatRequest] fnResultsBranch: %+v", results)
+ for i, ss := range results {
+ name, args := ss.name, ss.arguments
+
+ initialMessage := schema.OpenAIResponse{
+ ID: traceID.ID,
+ Created: traceID.Created,
+ Model: request.Model, // we have to return what the user sent here, due to OpenAI spec.
+ Choices: []schema.Choice{{
+ FinishReason: "function_call",
+ Message: &schema.Message{
+ Role: "assistant",
+ ToolCalls: []schema.ToolCall{
+ {
+ Index: i,
+ ID: traceID.ID,
+ Type: "function",
+ FunctionCall: schema.FunctionCall{
+ Name: name,
+ Arguments: args,
+ },
+ },
+ },
+ }}},
+ Object: "chat.completion.chunk",
+ }
+ rawFinalResultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: &initialMessage}
+ }
+ }
+ }
+
+ close(rawFinalResultChannel)
+ }()
+
+ finalResultChannel = rawFinalResultChannel
+ completionsChannel = rawCompletionsChannel
+ tokenChannel = rawTokenChannel
+ return
+}
+
+func (oais *OpenAIService) handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, args, prompt string) (string, error) {
+ log.Debug().Msgf("[handleQuestion called] nothing to do, computing a reply")
+
+ // If there is a message that the LLM already sends as part of the JSON reply, use it
+ arguments := map[string]interface{}{}
+ json.Unmarshal([]byte(args), &arguments)
+ m, exists := arguments["message"]
+ if exists {
+ switch message := m.(type) {
+ case string:
+ if message != "" {
+ log.Debug().Msgf("Reply received from LLM: %s", message)
+ message = oais.llmbs.Finetune(*config, prompt, message)
+ log.Debug().Msgf("Reply received from LLM(finetuned): %s", message)
+
+ return message, nil
+ }
+ }
+ }
+
+ log.Debug().Msgf("No action received from LLM, without a message, computing a reply")
+ // Otherwise ask the LLM to understand the JSON output and the context, and return a message
+ // Note: This costs (in term of CPU/GPU) another computation
+ config.Grammar = ""
+ images := []string{}
+ for _, m := range input.Messages {
+ images = append(images, m.StringImages...)
+ }
+
+ resultChannel, _, err := oais.llmbs.Inference(input.Context, &backend.LLMRequest{
+ Text: prompt,
+ Images: images,
+ RawMessages: input.Messages, // Experimental
+ }, config, false)
+
+ if err != nil {
+ log.Error().Msgf("inference setup error: %s", err.Error())
+ return "", err
+ }
+
+ raw := <-resultChannel
+ if raw.Error != nil {
+ log.Error().Msgf("inference error: %q", raw.Error.Error())
+ return "", err
+ }
+ if raw.Value == nil {
+ log.Warn().Msgf("nil inference response")
+ return "", nil
+ }
+ return oais.llmbs.Finetune(*config, prompt, raw.Value.Response), nil
+}
+
+type funcCallResults struct {
+ name string
+ arguments string
+}
+
+func parseFunctionCall(llmresult string, multipleResults bool) []funcCallResults {
+
+ results := []funcCallResults{}
+
+ // TODO: use generics to avoid this code duplication
+ if multipleResults {
+ ss := []map[string]interface{}{}
+ s := utils.EscapeNewLines(llmresult)
+ json.Unmarshal([]byte(s), &ss)
+
+ for _, s := range ss {
+ func_name, ok := s["function"]
+ if !ok {
+ continue
+ }
+ args, ok := s["arguments"]
+ if !ok {
+ continue
+ }
+ d, _ := json.Marshal(args)
+ funcName, ok := func_name.(string)
+ if !ok {
+ continue
+ }
+ results = append(results, funcCallResults{name: funcName, arguments: string(d)})
+ }
+ } else {
+ // As we have to change the result before processing, we can't stream the answer token-by-token (yet?)
+ ss := map[string]interface{}{}
+ // This prevent newlines to break JSON parsing for clients
+ // s := utils.EscapeNewLines(llmresult)
+ json.Unmarshal([]byte(llmresult), &ss)
+
+ // The grammar defines the function name as "function", while OpenAI returns "name"
+ func_name, ok := ss["function"]
+ if !ok {
+ log.Debug().Msg("ss[function] is not OK!")
+ return results
+ }
+ // Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object
+ args, ok := ss["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix)
+ if !ok {
+ log.Debug().Msg("ss[arguments] is not OK!")
+ return results
+ }
+ d, _ := json.Marshal(args)
+ funcName, ok := func_name.(string)
+ if !ok {
+ log.Debug().Msgf("unexpected func_name: %+v", func_name)
+ return results
+ }
+ results = append(results, funcCallResults{name: funcName, arguments: string(d)})
+ }
+ return results
+}
diff --git a/core/startup/startup.go b/core/startup/startup.go
index 6298f034..92ccaa9d 100644
--- a/core/startup/startup.go
+++ b/core/startup/startup.go
@@ -4,17 +4,21 @@ import (
"fmt"
"os"
+ "github.com/go-skynet/LocalAI/core"
+ "github.com/go-skynet/LocalAI/core/backend"
"github.com/go-skynet/LocalAI/core/config"
+ openaiendpoint "github.com/go-skynet/LocalAI/core/http/endpoints/openai" // TODO: This is dubious. Fix this when splitting assistant api up.
"github.com/go-skynet/LocalAI/core/services"
"github.com/go-skynet/LocalAI/internal"
"github.com/go-skynet/LocalAI/pkg/assets"
"github.com/go-skynet/LocalAI/pkg/model"
- pkgStartup "github.com/go-skynet/LocalAI/pkg/startup"
+ "github.com/go-skynet/LocalAI/pkg/utils"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
)
-func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.ModelLoader, *config.ApplicationConfig, error) {
+// (*config.BackendConfigLoader, *model.ModelLoader, *config.ApplicationConfig, error) {
+func Startup(opts ...config.AppOption) (*core.Application, error) {
options := config.NewApplicationConfig(opts...)
zerolog.SetGlobalLevel(zerolog.InfoLevel)
@@ -27,68 +31,75 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode
// Make sure directories exists
if options.ModelPath == "" {
- return nil, nil, nil, fmt.Errorf("options.ModelPath cannot be empty")
+ return nil, fmt.Errorf("options.ModelPath cannot be empty")
}
err := os.MkdirAll(options.ModelPath, 0755)
if err != nil {
- return nil, nil, nil, fmt.Errorf("unable to create ModelPath: %q", err)
+ return nil, fmt.Errorf("unable to create ModelPath: %q", err)
}
if options.ImageDir != "" {
err := os.MkdirAll(options.ImageDir, 0755)
if err != nil {
- return nil, nil, nil, fmt.Errorf("unable to create ImageDir: %q", err)
+ return nil, fmt.Errorf("unable to create ImageDir: %q", err)
}
}
if options.AudioDir != "" {
err := os.MkdirAll(options.AudioDir, 0755)
if err != nil {
- return nil, nil, nil, fmt.Errorf("unable to create AudioDir: %q", err)
+ return nil, fmt.Errorf("unable to create AudioDir: %q", err)
}
}
if options.UploadDir != "" {
err := os.MkdirAll(options.UploadDir, 0755)
if err != nil {
- return nil, nil, nil, fmt.Errorf("unable to create UploadDir: %q", err)
+ return nil, fmt.Errorf("unable to create UploadDir: %q", err)
+ }
+ }
+ if options.ConfigsDir != "" {
+ err := os.MkdirAll(options.ConfigsDir, 0755)
+ if err != nil {
+ return nil, fmt.Errorf("unable to create ConfigsDir: %q", err)
}
}
- //
- pkgStartup.PreloadModelsConfigurations(options.ModelLibraryURL, options.ModelPath, options.ModelsURL...)
+ // Load config jsons
+ utils.LoadConfig(options.UploadDir, openaiendpoint.UploadedFilesFile, &openaiendpoint.UploadedFiles)
+ utils.LoadConfig(options.ConfigsDir, openaiendpoint.AssistantsConfigFile, &openaiendpoint.Assistants)
+ utils.LoadConfig(options.ConfigsDir, openaiendpoint.AssistantsFileConfigFile, &openaiendpoint.AssistantFiles)
- cl := config.NewBackendConfigLoader()
- ml := model.NewModelLoader(options.ModelPath)
+ app := createApplication(options)
- configLoaderOpts := options.ToConfigLoaderOptions()
+ services.PreloadModelsConfigurations(options.ModelLibraryURL, options.ModelPath, options.ModelsURL...)
- if err := cl.LoadBackendConfigsFromPath(options.ModelPath, configLoaderOpts...); err != nil {
+ if err := app.BackendConfigLoader.LoadBackendConfigsFromPath(options.ModelPath, app.ApplicationConfig.ToConfigLoaderOptions()...); err != nil {
log.Error().Err(err).Msg("error loading config files")
}
if options.ConfigFile != "" {
- if err := cl.LoadBackendConfigFile(options.ConfigFile, configLoaderOpts...); err != nil {
+ if err := app.BackendConfigLoader.LoadBackendConfigFile(options.ConfigFile, app.ApplicationConfig.ToConfigLoaderOptions()...); err != nil {
log.Error().Err(err).Msg("error loading config file")
}
}
- if err := cl.Preload(options.ModelPath); err != nil {
+ if err := app.BackendConfigLoader.Preload(options.ModelPath); err != nil {
log.Error().Err(err).Msg("error downloading models")
}
if options.PreloadJSONModels != "" {
- if err := services.ApplyGalleryFromString(options.ModelPath, options.PreloadJSONModels, cl, options.Galleries); err != nil {
- return nil, nil, nil, err
+ if err := services.ApplyGalleryFromString(options.ModelPath, options.PreloadJSONModels, app.BackendConfigLoader, options.Galleries); err != nil {
+ return nil, err
}
}
if options.PreloadModelsFromPath != "" {
- if err := services.ApplyGalleryFromFile(options.ModelPath, options.PreloadModelsFromPath, cl, options.Galleries); err != nil {
- return nil, nil, nil, err
+ if err := services.ApplyGalleryFromFile(options.ModelPath, options.PreloadModelsFromPath, app.BackendConfigLoader, options.Galleries); err != nil {
+ return nil, err
}
}
if options.Debug {
- for _, v := range cl.ListBackendConfigs() {
- cfg, _ := cl.GetBackendConfig(v)
+ for _, v := range app.BackendConfigLoader.ListBackendConfigs() {
+ cfg, _ := app.BackendConfigLoader.GetBackendConfig(v)
log.Debug().Msgf("Model: %s (config: %+v)", v, cfg)
}
}
@@ -106,17 +117,17 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode
go func() {
<-options.Context.Done()
log.Debug().Msgf("Context canceled, shutting down")
- ml.StopAllGRPC()
+ app.ModelLoader.StopAllGRPC()
}()
if options.WatchDog {
wd := model.NewWatchDog(
- ml,
+ app.ModelLoader,
options.WatchDogBusyTimeout,
options.WatchDogIdleTimeout,
options.WatchDogBusy,
options.WatchDogIdle)
- ml.SetWatchDog(wd)
+ app.ModelLoader.SetWatchDog(wd)
go wd.Run()
go func() {
<-options.Context.Done()
@@ -126,5 +137,35 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode
}
log.Info().Msg("core/startup process completed!")
- return cl, ml, options, nil
+ return app, nil
+}
+
+// In Lieu of a proper DI framework, this function wires up the Application manually.
+// This is in core/startup rather than core/state.go to keep package references clean!
+func createApplication(appConfig *config.ApplicationConfig) *core.Application {
+ app := &core.Application{
+ ApplicationConfig: appConfig,
+ BackendConfigLoader: config.NewBackendConfigLoader(),
+ ModelLoader: model.NewModelLoader(appConfig.ModelPath),
+ }
+
+ var err error
+
+ app.EmbeddingsBackendService = backend.NewEmbeddingsBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
+ app.ImageGenerationBackendService = backend.NewImageGenerationBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
+ app.LLMBackendService = backend.NewLLMBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
+ app.TranscriptionBackendService = backend.NewTranscriptionBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
+ app.TextToSpeechBackendService = backend.NewTextToSpeechBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
+
+ app.BackendMonitorService = services.NewBackendMonitorService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
+ app.GalleryService = services.NewGalleryService(app.ApplicationConfig.ModelPath)
+ app.ListModelsService = services.NewListModelsService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
+ app.OpenAIService = services.NewOpenAIService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig, app.LLMBackendService)
+
+ app.LocalAIMetricsService, err = services.NewLocalAIMetricsService()
+ if err != nil {
+ log.Warn().Msg("Unable to initialize LocalAIMetricsService - non-fatal, optional service")
+ }
+
+ return app
}
diff --git a/core/state.go b/core/state.go
new file mode 100644
index 00000000..cf0d614b
--- /dev/null
+++ b/core/state.go
@@ -0,0 +1,41 @@
+package core
+
+import (
+ "github.com/go-skynet/LocalAI/core/backend"
+ "github.com/go-skynet/LocalAI/core/config"
+ "github.com/go-skynet/LocalAI/core/services"
+ "github.com/go-skynet/LocalAI/pkg/model"
+)
+
+// TODO: Can I come up with a better name or location for this?
+// The purpose of this structure is to hold pointers to all initialized services, to make plumbing easy
+// Perhaps a proper DI system is worth it in the future, but for now keep things simple.
+type Application struct {
+
+ // Application-Level Config
+ ApplicationConfig *config.ApplicationConfig
+ // ApplicationState *ApplicationState
+
+ // Core Low-Level Services
+ BackendConfigLoader *config.BackendConfigLoader
+ ModelLoader *model.ModelLoader
+
+ // Backend Services
+ EmbeddingsBackendService *backend.EmbeddingsBackendService
+ ImageGenerationBackendService *backend.ImageGenerationBackendService
+ LLMBackendService *backend.LLMBackendService
+ TranscriptionBackendService *backend.TranscriptionBackendService
+ TextToSpeechBackendService *backend.TextToSpeechBackendService
+
+ // LocalAI System Services
+ BackendMonitorService *services.BackendMonitorService
+ GalleryService *services.GalleryService
+ ListModelsService *services.ListModelsService
+ LocalAIMetricsService *services.LocalAIMetricsService
+ OpenAIService *services.OpenAIService
+}
+
+// TODO [NEXT PR?]: Break up ApplicationConfig.
+// Migrate over stuff that is not set via config at all - especially runtime stuff
+type ApplicationState struct {
+}
diff --git a/examples/bruno/LocalAI Test Requests/llm text/-completions Stream.bru b/examples/bruno/LocalAI Test Requests/llm text/-completions Stream.bru
new file mode 100644
index 00000000..c33bafe1
--- /dev/null
+++ b/examples/bruno/LocalAI Test Requests/llm text/-completions Stream.bru
@@ -0,0 +1,25 @@
+meta {
+ name: -completions Stream
+ type: http
+ seq: 4
+}
+
+post {
+ url: {{PROTOCOL}}{{HOST}}:{{PORT}}/completions
+ body: json
+ auth: none
+}
+
+headers {
+ Content-Type: application/json
+}
+
+body:json {
+ {
+ "model": "{{DEFAULT_MODEL}}",
+ "prompt": "function downloadFile(string url, string outputPath) {",
+ "max_tokens": 256,
+ "temperature": 0.5,
+ "stream": true
+ }
+}
diff --git a/pkg/concurrency/concurrency.go b/pkg/concurrency/concurrency.go
new file mode 100644
index 00000000..324e8cc5
--- /dev/null
+++ b/pkg/concurrency/concurrency.go
@@ -0,0 +1,135 @@
+package concurrency
+
+import (
+ "sync"
+)
+
+// TODO: closeWhenDone bool parameter ::
+// It currently is experimental, and therefore exists.
+// Is there ever a situation to use false?
+
+// This function is used to merge the results of a slice of channels of a specific result type down to a single result channel of a second type.
+// mappingFn allows the caller to convert from the input type to the output type
+// if closeWhenDone is set to true, the output channel will be closed when all individual result channels of the slice have been closed - otherwise it will be left open for future use.
+// The same WaitGroup used to trigger that optional closing is returned for any other synchronization purposes.
+func SliceOfChannelsRawMerger[IndividualResultType any, OutputResultType any](individualResultChannels []<-chan IndividualResultType, outputChannel chan<- OutputResultType, mappingFn func(IndividualResultType) (OutputResultType, error), closeWhenDone bool) *sync.WaitGroup {
+ var wg sync.WaitGroup
+ wg.Add(len(individualResultChannels))
+ mergingFn := func(c <-chan IndividualResultType) {
+ for r := range c {
+ mr, err := mappingFn(r)
+ if err == nil {
+ outputChannel <- mr
+ }
+ }
+ wg.Done()
+ }
+ for _, irc := range individualResultChannels {
+ go mergingFn(irc)
+ }
+ if closeWhenDone {
+ go func() {
+ wg.Wait()
+ close(outputChannel)
+ }()
+ }
+
+ return &wg
+}
+
+// This function is used to merge the results of a slice of channels of a specific result type down to a single result channel of THE SAME TYPE.
+// if closeWhenDone is set to true, the output channel will be closed when all individual result channels of the slice have been closed - otherwise it will be left open for future use.
+// The same WaitGroup used to trigger that optional closing is returned for any other synchronization purposes.
+func SliceOfChannelsRawMergerWithoutMapping[ResultType any](individualResultsChannels []<-chan ResultType, outputChannel chan<- ResultType, closeWhenDone bool) *sync.WaitGroup {
+ return SliceOfChannelsRawMerger(individualResultsChannels, outputChannel, func(v ResultType) (ResultType, error) { return v, nil }, closeWhenDone)
+}
+
+// This function is used to merge the results of a slice of channels of a specific result type down to a single succcess result channel of a second type, and an error channel
+// mappingFn allows the caller to convert from the input type to the output type
+// This variant is designed to be aware of concurrency.ErrorOr[T], splitting successes from failures.
+// if closeWhenDone is set to true, the output channel will be closed when all individual result channels of the slice have been closed - otherwise it will be left open for future use.
+// The same WaitGroup used to trigger that optional closing is returned for any other synchronization purposes.
+func SliceOfChannelsMergerWithErrors[IndividualResultType any, OutputResultType any](individualResultChannels []<-chan ErrorOr[IndividualResultType], successChannel chan<- OutputResultType, errorChannel chan<- error, mappingFn func(IndividualResultType) (OutputResultType, error), closeWhenDone bool) *sync.WaitGroup {
+ var wg sync.WaitGroup
+ wg.Add(len(individualResultChannels))
+ mergingFn := func(c <-chan ErrorOr[IndividualResultType]) {
+ for r := range c {
+ if r.Error != nil {
+ errorChannel <- r.Error
+ } else {
+ mv, err := mappingFn(r.Value)
+ if err != nil {
+ errorChannel <- err
+ } else {
+ successChannel <- mv
+ }
+ }
+ }
+ wg.Done()
+ }
+ for _, irc := range individualResultChannels {
+ go mergingFn(irc)
+ }
+ if closeWhenDone {
+ go func() {
+ wg.Wait()
+ close(successChannel)
+ close(errorChannel)
+ }()
+ }
+ return &wg
+}
+
+// This function is used to reduce down the results of a slice of channels of a specific result type down to a single result value of a second type.
+// reducerFn allows the caller to convert from the input type to the output type
+// if closeWhenDone is set to true, the output channel will be closed when all individual result channels of the slice have been closed - otherwise it will be left open for future use.
+// The same WaitGroup used to trigger that optional closing is returned for any other synchronization purposes.
+func SliceOfChannelsReducer[InputResultType any, OutputResultType any](individualResultsChannels []<-chan InputResultType, outputChannel chan<- OutputResultType,
+ reducerFn func(iv InputResultType, ov OutputResultType) OutputResultType, initialValue OutputResultType, closeWhenDone bool) (wg *sync.WaitGroup) {
+ wg = &sync.WaitGroup{}
+ wg.Add(len(individualResultsChannels))
+ reduceLock := sync.Mutex{}
+ reducingFn := func(c <-chan InputResultType) {
+ for iv := range c {
+ reduceLock.Lock()
+ initialValue = reducerFn(iv, initialValue)
+ reduceLock.Unlock()
+ }
+ wg.Done()
+ }
+ for _, irc := range individualResultsChannels {
+ go reducingFn(irc)
+ }
+ go func() {
+ wg.Wait()
+ outputChannel <- initialValue
+ if closeWhenDone {
+ close(outputChannel)
+ }
+ }()
+ return wg
+}
+
+// This function is primarily designed to be used in combination with the above utility functions.
+// A slice of input result channels of a specific type is provided, along with a function to map those values to another type
+// A slice of output result channels is returned, where each value is mapped as it comes in.
+// The order of the slice will be retained.
+func SliceOfChannelsTransformer[InputResultType any, OutputResultType any](inputChanels []<-chan InputResultType, mappingFn func(v InputResultType) OutputResultType) (outputChannels []<-chan OutputResultType) {
+ rawOutputChannels := make([]<-chan OutputResultType, len(inputChanels))
+
+ transformingFn := func(ic <-chan InputResultType, oc chan OutputResultType) {
+ for iv := range ic {
+ oc <- mappingFn(iv)
+ }
+ close(oc)
+ }
+
+ for ci, c := range inputChanels {
+ roc := make(chan OutputResultType)
+ go transformingFn(c, roc)
+ rawOutputChannels[ci] = roc
+ }
+
+ outputChannels = rawOutputChannels
+ return
+}
diff --git a/pkg/concurrency/concurrency_test.go b/pkg/concurrency/concurrency_test.go
new file mode 100644
index 00000000..fedd74be
--- /dev/null
+++ b/pkg/concurrency/concurrency_test.go
@@ -0,0 +1,101 @@
+package concurrency_test
+
+// TODO: noramlly, these go in utils_tests, right? Why does this cause problems only in pkg/utils?
+
+import (
+ "fmt"
+ "slices"
+
+ . "github.com/go-skynet/LocalAI/pkg/concurrency"
+
+ . "github.com/onsi/ginkgo/v2"
+ . "github.com/onsi/gomega"
+)
+
+var _ = Describe("utils/concurrency tests", func() {
+ It("SliceOfChannelsReducer works", func() {
+ individualResultsChannels := []<-chan int{}
+ initialValue := 0
+ for i := 0; i < 3; i++ {
+ c := make(chan int)
+ go func(i int, c chan int) {
+ for ii := 1; ii < 4; ii++ {
+ c <- (i * ii)
+ }
+ close(c)
+ }(i, c)
+ individualResultsChannels = append(individualResultsChannels, c)
+ }
+ Expect(len(individualResultsChannels)).To(Equal(3))
+ finalResultChannel := make(chan int)
+ wg := SliceOfChannelsReducer[int, int](individualResultsChannels, finalResultChannel, func(input int, val int) int {
+ return val + input
+ }, initialValue, true)
+
+ Expect(wg).ToNot(BeNil())
+
+ result := <-finalResultChannel
+
+ Expect(result).ToNot(Equal(0))
+ Expect(result).To(Equal(18))
+ })
+
+ It("SliceOfChannelsRawMergerWithoutMapping works", func() {
+ individualResultsChannels := []<-chan int{}
+ for i := 0; i < 3; i++ {
+ c := make(chan int)
+ go func(i int, c chan int) {
+ for ii := 1; ii < 4; ii++ {
+ c <- (i * ii)
+ }
+ close(c)
+ }(i, c)
+ individualResultsChannels = append(individualResultsChannels, c)
+ }
+ Expect(len(individualResultsChannels)).To(Equal(3))
+ outputChannel := make(chan int)
+ wg := SliceOfChannelsRawMergerWithoutMapping(individualResultsChannels, outputChannel, true)
+ Expect(wg).ToNot(BeNil())
+ outputSlice := []int{}
+ for v := range outputChannel {
+ outputSlice = append(outputSlice, v)
+ }
+ Expect(len(outputSlice)).To(Equal(9))
+ slices.Sort(outputSlice)
+ Expect(outputSlice[0]).To(BeZero())
+ Expect(outputSlice[3]).To(Equal(1))
+ Expect(outputSlice[8]).To(Equal(6))
+ })
+
+ It("SliceOfChannelsTransformer works", func() {
+ individualResultsChannels := []<-chan int{}
+ for i := 0; i < 3; i++ {
+ c := make(chan int)
+ go func(i int, c chan int) {
+ for ii := 1; ii < 4; ii++ {
+ c <- (i * ii)
+ }
+ close(c)
+ }(i, c)
+ individualResultsChannels = append(individualResultsChannels, c)
+ }
+ Expect(len(individualResultsChannels)).To(Equal(3))
+ mappingFn := func(i int) string {
+ return fmt.Sprintf("$%d", i)
+ }
+
+ outputChannels := SliceOfChannelsTransformer(individualResultsChannels, mappingFn)
+ Expect(len(outputChannels)).To(Equal(3))
+ rSlice := []string{}
+ for ii := 1; ii < 4; ii++ {
+ for i := 0; i < 3; i++ {
+ res := <-outputChannels[i]
+ rSlice = append(rSlice, res)
+ }
+ }
+ slices.Sort(rSlice)
+ Expect(rSlice[0]).To(Equal("$0"))
+ Expect(rSlice[3]).To(Equal("$1"))
+ Expect(rSlice[8]).To(Equal("$6"))
+ })
+})
diff --git a/pkg/concurrency/types.go b/pkg/concurrency/types.go
new file mode 100644
index 00000000..76081ba3
--- /dev/null
+++ b/pkg/concurrency/types.go
@@ -0,0 +1,6 @@
+package concurrency
+
+type ErrorOr[T any] struct {
+ Value T
+ Error error
+}
diff --git a/pkg/grpc/backend.go b/pkg/grpc/backend.go
index 8fb8c39d..49a6b1bd 100644
--- a/pkg/grpc/backend.go
+++ b/pkg/grpc/backend.go
@@ -41,7 +41,7 @@ type Backend interface {
PredictStream(ctx context.Context, in *pb.PredictOptions, f func(s []byte), opts ...grpc.CallOption) error
GenerateImage(ctx context.Context, in *pb.GenerateImageRequest, opts ...grpc.CallOption) (*pb.Result, error)
TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error)
- AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.Result, error)
+ AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.TranscriptionResult, error)
TokenizeString(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.TokenizationResponse, error)
Status(ctx context.Context) (*pb.StatusResponse, error)
diff --git a/pkg/grpc/base/base.go b/pkg/grpc/base/base.go
index 0af5d94f..c0b4bc34 100644
--- a/pkg/grpc/base/base.go
+++ b/pkg/grpc/base/base.go
@@ -53,8 +53,8 @@ func (llm *Base) GenerateImage(*pb.GenerateImageRequest) error {
return fmt.Errorf("unimplemented")
}
-func (llm *Base) AudioTranscription(*pb.TranscriptRequest) (schema.Result, error) {
- return schema.Result{}, fmt.Errorf("unimplemented")
+func (llm *Base) AudioTranscription(*pb.TranscriptRequest) (schema.TranscriptionResult, error) {
+ return schema.TranscriptionResult{}, fmt.Errorf("unimplemented")
}
func (llm *Base) TTS(*pb.TTSRequest) error {
diff --git a/pkg/grpc/client.go b/pkg/grpc/client.go
index 882db12a..0e0e56c7 100644
--- a/pkg/grpc/client.go
+++ b/pkg/grpc/client.go
@@ -210,7 +210,7 @@ func (c *Client) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOp
return client.TTS(ctx, in, opts...)
}
-func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.Result, error) {
+func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.TranscriptionResult, error) {
if !c.parallel {
c.opMutex.Lock()
defer c.opMutex.Unlock()
@@ -231,7 +231,7 @@ func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptReques
if err != nil {
return nil, err
}
- tresult := &schema.Result{}
+ tresult := &schema.TranscriptionResult{}
for _, s := range res.Segments {
tks := []int{}
for _, t := range s.Tokens {
diff --git a/pkg/grpc/embed.go b/pkg/grpc/embed.go
index 73b185a3..b4ba4884 100644
--- a/pkg/grpc/embed.go
+++ b/pkg/grpc/embed.go
@@ -53,12 +53,12 @@ func (e *embedBackend) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.
return e.s.TTS(ctx, in)
}
-func (e *embedBackend) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.Result, error) {
+func (e *embedBackend) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.TranscriptionResult, error) {
r, err := e.s.AudioTranscription(ctx, in)
if err != nil {
return nil, err
}
- tr := &schema.Result{}
+ tr := &schema.TranscriptionResult{}
for _, s := range r.Segments {
var tks []int
for _, t := range s.Tokens {
diff --git a/pkg/grpc/interface.go b/pkg/grpc/interface.go
index 4d06544d..aa7a3fbc 100644
--- a/pkg/grpc/interface.go
+++ b/pkg/grpc/interface.go
@@ -15,7 +15,7 @@ type LLM interface {
Load(*pb.ModelOptions) error
Embeddings(*pb.PredictOptions) ([]float32, error)
GenerateImage(*pb.GenerateImageRequest) error
- AudioTranscription(*pb.TranscriptRequest) (schema.Result, error)
+ AudioTranscription(*pb.TranscriptRequest) (schema.TranscriptionResult, error)
TTS(*pb.TTSRequest) error
TokenizeString(*pb.PredictOptions) (pb.TokenizationResponse, error)
Status() (pb.StatusResponse, error)
diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go
index 5d9808a4..617d8f62 100644
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -81,7 +81,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
if _, err := os.Stat(uri); err == nil {
serverAddress, err := getFreeAddress()
if err != nil {
- return "", fmt.Errorf("failed allocating free ports: %s", err.Error())
+ return "", fmt.Errorf("%s failed allocating free ports: %s", backend, err.Error())
}
// Make sure the process is executable
if err := ml.startProcess(uri, o.model, serverAddress); err != nil {
@@ -134,7 +134,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
if !ready {
log.Debug().Msgf("GRPC Service NOT ready")
- return "", fmt.Errorf("grpc service not ready")
+ return "", fmt.Errorf("%s grpc service not ready", backend)
}
options := *o.gRPCOptions
@@ -145,10 +145,10 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
res, err := client.GRPC(o.parallelRequests, ml.wd).LoadModel(o.context, &options)
if err != nil {
- return "", fmt.Errorf("could not load model: %w", err)
+ return "", fmt.Errorf("\"%s\" could not load model: %w", backend, err)
}
if !res.Success {
- return "", fmt.Errorf("could not load model (no success): %s", res.Message)
+ return "", fmt.Errorf("\"%s\" could not load model (no success): %s", backend, res.Message)
}
return client, nil
diff --git a/pkg/startup/model_preload.go b/pkg/startup/model_preload.go
deleted file mode 100644
index b09516a7..00000000
--- a/pkg/startup/model_preload.go
+++ /dev/null
@@ -1,85 +0,0 @@
-package startup
-
-import (
- "errors"
- "os"
- "path/filepath"
-
- "github.com/go-skynet/LocalAI/embedded"
- "github.com/go-skynet/LocalAI/pkg/downloader"
- "github.com/go-skynet/LocalAI/pkg/utils"
- "github.com/rs/zerolog/log"
-)
-
-// PreloadModelsConfigurations will preload models from the given list of URLs
-// It will download the model if it is not already present in the model path
-// It will also try to resolve if the model is an embedded model YAML configuration
-func PreloadModelsConfigurations(modelLibraryURL string, modelPath string, models ...string) {
- for _, url := range models {
-
- // As a best effort, try to resolve the model from the remote library
- // if it's not resolved we try with the other method below
- if modelLibraryURL != "" {
- lib, err := embedded.GetRemoteLibraryShorteners(modelLibraryURL)
- if err == nil {
- if lib[url] != "" {
- log.Debug().Msgf("[startup] model configuration is defined remotely: %s (%s)", url, lib[url])
- url = lib[url]
- }
- }
- }
-
- url = embedded.ModelShortURL(url)
- switch {
- case embedded.ExistsInModelsLibrary(url):
- modelYAML, err := embedded.ResolveContent(url)
- // If we resolve something, just save it to disk and continue
- if err != nil {
- log.Error().Err(err).Msg("error resolving model content")
- continue
- }
-
- log.Debug().Msgf("[startup] resolved embedded model: %s", url)
- md5Name := utils.MD5(url)
- modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml"
- if err := os.WriteFile(modelDefinitionFilePath, modelYAML, os.ModePerm); err != nil {
- log.Error().Err(err).Str("filepath", modelDefinitionFilePath).Msg("error writing model definition")
- }
- case downloader.LooksLikeURL(url):
- log.Debug().Msgf("[startup] resolved model to download: %s", url)
-
- // md5 of model name
- md5Name := utils.MD5(url)
-
- // check if file exists
- if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) {
- modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml"
- err := downloader.DownloadFile(url, modelDefinitionFilePath, "", func(fileName, current, total string, percent float64) {
- utils.DisplayDownloadFunction(fileName, current, total, percent)
- })
- if err != nil {
- log.Error().Err(err).Str("url", url).Str("filepath", modelDefinitionFilePath).Msg("error downloading model")
- }
- }
- default:
- if _, err := os.Stat(url); err == nil {
- log.Debug().Msgf("[startup] resolved local model: %s", url)
- // copy to modelPath
- md5Name := utils.MD5(url)
-
- modelYAML, err := os.ReadFile(url)
- if err != nil {
- log.Error().Err(err).Str("filepath", url).Msg("error reading model definition")
- continue
- }
-
- modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml"
- if err := os.WriteFile(modelDefinitionFilePath, modelYAML, os.ModePerm); err != nil {
- log.Error().Err(err).Str("filepath", modelDefinitionFilePath).Msg("error loading model: %s")
- }
- } else {
- log.Warn().Msgf("[startup] failed resolving model '%s'", url)
- }
- }
- }
-}
diff --git a/pkg/utils/base64.go b/pkg/utils/base64.go
new file mode 100644
index 00000000..769d8a88
--- /dev/null
+++ b/pkg/utils/base64.go
@@ -0,0 +1,50 @@
+package utils
+
+import (
+ "encoding/base64"
+ "fmt"
+ "io"
+ "net/http"
+ "strings"
+ "time"
+)
+
+var base64DownloadClient http.Client = http.Client{
+ Timeout: 30 * time.Second,
+}
+
+// this function check if the string is an URL, if it's an URL downloads the image in memory
+// encodes it in base64 and returns the base64 string
+
+// This may look weird down in pkg/utils while it is currently only used in core/config
+//
+// but I believe it may be useful for MQTT as well in the near future, so I'm
+// extracting it while I'm thinking of it.
+func GetImageURLAsBase64(s string) (string, error) {
+ if strings.HasPrefix(s, "http") {
+ // download the image
+ resp, err := base64DownloadClient.Get(s)
+ if err != nil {
+ return "", err
+ }
+ defer resp.Body.Close()
+
+ // read the image data into memory
+ data, err := io.ReadAll(resp.Body)
+ if err != nil {
+ return "", err
+ }
+
+ // encode the image data in base64
+ encoded := base64.StdEncoding.EncodeToString(data)
+
+ // return the base64 string
+ return encoded, nil
+ }
+
+ // if the string instead is prefixed with "data:image/jpeg;base64,", drop it
+ if strings.HasPrefix(s, "data:image/jpeg;base64,") {
+ return strings.ReplaceAll(s, "data:image/jpeg;base64,", ""), nil
+ }
+ return "", fmt.Errorf("not valid string")
+}
From f1f39eea3fd915e8ccc29ad8fa9d20c003ef8ed3 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Sat, 13 Apr 2024 09:47:33 +0200
Subject: [PATCH 0147/2750] Create localaibot_automerge.yml
Signed-off-by: Ettore Di Giacinto
---
.github/workflows/localaibot_automerge.yml | 42 ++++++++++++++++++++++
1 file changed, 42 insertions(+)
create mode 100644 .github/workflows/localaibot_automerge.yml
diff --git a/.github/workflows/localaibot_automerge.yml b/.github/workflows/localaibot_automerge.yml
new file mode 100644
index 00000000..a540997b
--- /dev/null
+++ b/.github/workflows/localaibot_automerge.yml
@@ -0,0 +1,42 @@
+name: Dependabot auto-merge
+on:
+- pull_request_target
+
+permissions:
+ contents: write
+ pull-requests: write
+ packages: read
+
+jobs:
+ dependabot:
+ runs-on: ubuntu-latest
+ if: ${{ github.actor == 'localai-bot' }}
+ steps:
+ - name: Dependabot metadata
+ id: metadata
+ uses: dependabot/fetch-metadata@v1.3.4
+ with:
+ github-token: "${{ secrets.GITHUB_TOKEN }}"
+ skip-commit-verification: true
+
+ - name: Checkout repository
+ uses: actions/checkout@v3
+
+ - name: Approve a PR if not already approved
+ run: |
+ gh pr checkout "$PR_URL"
+ if [ "$(gh pr status --json reviewDecision -q .currentBranch.reviewDecision)" != "APPROVED" ];
+ then
+ gh pr review --approve "$PR_URL"
+ else
+ echo "PR already approved.";
+ fi
+ env:
+ PR_URL: ${{github.event.pull_request.html_url}}
+ GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
+
+ - name: Enable auto-merge for LocalAIBot PRs
+ run: gh pr merge --auto --squash "$PR_URL"
+ env:
+ PR_URL: ${{github.event.pull_request.html_url}}
+ GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
From 95244ed6e7598db09fa8974052f550bb1dcc9d8e Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Sat, 13 Apr 2024 10:03:15 +0200
Subject: [PATCH 0148/2750] Update localaibot_automerge.yml
Signed-off-by: Ettore Di Giacinto
---
.github/workflows/localaibot_automerge.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/localaibot_automerge.yml b/.github/workflows/localaibot_automerge.yml
index a540997b..98629ab7 100644
--- a/.github/workflows/localaibot_automerge.yml
+++ b/.github/workflows/localaibot_automerge.yml
@@ -1,4 +1,4 @@
-name: Dependabot auto-merge
+name: LocalAI-bot auto-merge
on:
- pull_request_target
From 4e74560649b0cb54fd1ab03d3a7a4105e2dd01fd Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Sat, 13 Apr 2024 13:30:40 +0200
Subject: [PATCH 0149/2750] ci: fix release pipeline missing dependencies
(#2025)
---
.github/workflows/release.yaml | 20 +++++++++++++++++---
1 file changed, 17 insertions(+), 3 deletions(-)
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index 3c1cea44..33c640cc 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -1,6 +1,8 @@
name: Build and Release
-on: push
+on:
+- push
+- pull_request
env:
GRPC_VERSION: v1.58.0
@@ -40,7 +42,7 @@ jobs:
- name: Dependencies
run: |
sudo apt-get update
- sudo apt-get install build-essential ffmpeg
+ sudo apt-get install build-essential ffmpeg protobuf-compiler
- name: Install CUDA Dependencies
if: ${{ matrix.build == 'cuda12' || matrix.build == 'cuda11' }}
run: |
@@ -75,6 +77,9 @@ jobs:
CMAKE_ARGS: "${{ matrix.defines }}"
BUILD_ID: "${{ matrix.build }}"
run: |
+ go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
+ go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
+ export PATH=$PATH:$GOPATH/bin
if [ "${{ matrix.build }}" == "cuda12" ] || [ "${{ matrix.build }}" == "cuda11" ]; then
export BUILD_TYPE=cublas
export PATH=/usr/local/cuda/bin:$PATH
@@ -106,9 +111,12 @@ jobs:
cache: false
- name: Dependencies
run: |
- sudo apt-get install -y --no-install-recommends libopencv-dev
+ sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler
+ go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
+ go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
- name: Build stablediffusion
run: |
+ export PATH=$PATH:$GOPATH/bin
make backend-assets/grpc/stablediffusion
mkdir -p release && cp backend-assets/grpc/stablediffusion release
- uses: actions/upload-artifact@v4
@@ -139,6 +147,8 @@ jobs:
- name: Dependencies
run: |
brew install protobuf grpc
+ go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
+ go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
- name: Build
id: build
env:
@@ -147,6 +157,7 @@ jobs:
run: |
export C_INCLUDE_PATH=/usr/local/include
export CPLUS_INCLUDE_PATH=/usr/local/include
+ export PATH=$PATH:$GOPATH/bin
make dist
- uses: actions/upload-artifact@v4
with:
@@ -183,6 +194,8 @@ jobs:
- name: Dependencies
run: |
brew install protobuf grpc
+ go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
+ go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
- name: Build
id: build
env:
@@ -191,6 +204,7 @@ jobs:
run: |
export C_INCLUDE_PATH=/usr/local/include
export CPLUS_INCLUDE_PATH=/usr/local/include
+ export PATH=$PATH:$GOPATH/bin
make dist
- uses: actions/upload-artifact@v4
with:
From b91820b7f88173e532af8de509d43dd6191a2386 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Sat, 13 Apr 2024 13:46:07 +0200
Subject: [PATCH 0150/2750] Update localaibot_automerge.yml
Signed-off-by: Ettore Di Giacinto
---
.github/workflows/localaibot_automerge.yml | 7 -------
1 file changed, 7 deletions(-)
diff --git a/.github/workflows/localaibot_automerge.yml b/.github/workflows/localaibot_automerge.yml
index 98629ab7..74c725f3 100644
--- a/.github/workflows/localaibot_automerge.yml
+++ b/.github/workflows/localaibot_automerge.yml
@@ -12,13 +12,6 @@ jobs:
runs-on: ubuntu-latest
if: ${{ github.actor == 'localai-bot' }}
steps:
- - name: Dependabot metadata
- id: metadata
- uses: dependabot/fetch-metadata@v1.3.4
- with:
- github-token: "${{ secrets.GITHUB_TOKEN }}"
- skip-commit-verification: true
-
- name: Checkout repository
uses: actions/checkout@v3
From 619f2517a490a1a3448cf5df837a8229b232287a Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sat, 13 Apr 2024 15:47:39 +0200
Subject: [PATCH 0151/2750] :arrow_up: Update ggerganov/llama.cpp (#2028)
Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index 9f86ef23..1b59c604 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=ab9a3240a9da941fdef5cd4a25f2b97c2f5a67aa
+CPPLLAMA_VERSION?=4bd0f93e4ab4fe6682e7d0241c1bdec1397e954a
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
From 0fdff269241d5ce93f325a48691bf9ebc5b5b9e6 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Sat, 13 Apr 2024 18:59:21 +0200
Subject: [PATCH 0152/2750] feat(parler-tts): Add new backend (#2027)
* feat(parler-tts): Add new backend
Signed-off-by: Ettore Di Giacinto
* feat(parler-tts): try downgrade protobuf
Signed-off-by: Ettore Di Giacinto
* feat(parler-tts): add parler conda env
Signed-off-by: Ettore Di Giacinto
* Revert "feat(parler-tts): try downgrade protobuf"
This reverts commit bd5941d5cfc00676b45a99f71debf3c34249cf3c.
Signed-off-by: Ettore Di Giacinto
* deps: add grpc
Signed-off-by: Ettore Di Giacinto
* fix: try to gen proto with same environment
* workaround
* Revert "fix: try to gen proto with same environment"
This reverts commit 998c745e2f475ec3ec43ac017bcebf3a7ce15b8b.
* Workaround fixup
---------
Signed-off-by: Ettore Di Giacinto
Co-authored-by: Dave
---
.github/workflows/test-extra.yml | 29 ++++
Dockerfile | 5 +-
Makefile | 13 +-
backend/python/parler-tts/Makefile | 39 ++++++
backend/python/parler-tts/install.sh | 39 ++++++
backend/python/parler-tts/parler-nvidia.yml | 48 +++++++
backend/python/parler-tts/parler.yml | 36 +++++
.../python/parler-tts/parler_tts_server.py | 125 ++++++++++++++++++
backend/python/parler-tts/run.sh | 16 +++
backend/python/parler-tts/test.sh | 11 ++
backend/python/parler-tts/test_parler.py | 81 ++++++++++++
backend/python/transformers-musicgen/run.sh | 2 +-
12 files changed, 440 insertions(+), 4 deletions(-)
create mode 100644 backend/python/parler-tts/Makefile
create mode 100755 backend/python/parler-tts/install.sh
create mode 100644 backend/python/parler-tts/parler-nvidia.yml
create mode 100644 backend/python/parler-tts/parler.yml
create mode 100644 backend/python/parler-tts/parler_tts_server.py
create mode 100644 backend/python/parler-tts/run.sh
create mode 100644 backend/python/parler-tts/test.sh
create mode 100644 backend/python/parler-tts/test_parler.py
diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml
index 7705783e..fa45cb3c 100644
--- a/.github/workflows/test-extra.yml
+++ b/.github/workflows/test-extra.yml
@@ -104,6 +104,35 @@ jobs:
make --jobs=5 --output-sync=target -C backend/python/diffusers
make --jobs=5 --output-sync=target -C backend/python/diffusers test
+ tests-parler-tts:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Clone
+ uses: actions/checkout@v4
+ with:
+ submodules: true
+ - name: Dependencies
+ run: |
+ sudo apt-get update
+ sudo apt-get install build-essential ffmpeg
+ curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
+ sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
+ gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
+ sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
+ sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
+ sudo apt-get update && \
+ sudo apt-get install -y conda
+ sudo apt-get install -y ca-certificates cmake curl patch python3-pip
+ sudo apt-get install -y libopencv-dev
+ pip install --user grpcio-tools
+
+ sudo rm -rfv /usr/bin/conda || true
+
+ - name: Test parler-tts
+ run: |
+ export PATH=$PATH:/opt/conda/bin
+ make --jobs=5 --output-sync=target -C backend/python/parler-tts
+ make --jobs=5 --output-sync=target -C backend/python/parler-tts test
tests-transformers-musicgen:
runs-on: ubuntu-latest
diff --git a/Dockerfile b/Dockerfile
index d0217d50..397fbe22 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -15,7 +15,7 @@ ARG TARGETVARIANT
ENV BUILD_TYPE=${BUILD_TYPE}
ENV DEBIAN_FRONTEND=noninteractive
-ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh"
+ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
ARG GO_TAGS="stablediffusion tinydream tts"
@@ -275,6 +275,9 @@ RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
make -C backend/python/transformers-musicgen \
; fi
+RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
+ make -C backend/python/parler-tts \
+ ; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
make -C backend/python/coqui \
; fi
diff --git a/Makefile b/Makefile
index 1b59c604..d5bc3739 100644
--- a/Makefile
+++ b/Makefile
@@ -439,10 +439,10 @@ protogen-go-clean:
$(RM) bin/*
.PHONY: protogen-python
-protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen sentencetransformers-protogen transformers-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen
+protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen
.PHONY: protogen-python-clean
-protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean
+protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean
.PHONY: autogptq-protogen
autogptq-protogen:
@@ -524,6 +524,14 @@ transformers-protogen:
transformers-protogen-clean:
$(MAKE) -C backend/python/transformers protogen-clean
+.PHONY: parler-tts-protogen
+parler-tts-protogen:
+ $(MAKE) -C backend/python/parler-tts protogen
+
+.PHONY: parler-tts-protogen-clean
+parler-tts-protogen-clean:
+ $(MAKE) -C backend/python/parler-tts protogen-clean
+
.PHONY: transformers-musicgen-protogen
transformers-musicgen-protogen:
$(MAKE) -C backend/python/transformers-musicgen protogen
@@ -560,6 +568,7 @@ prepare-extra-conda-environments: protogen-python
$(MAKE) -C backend/python/sentencetransformers
$(MAKE) -C backend/python/transformers
$(MAKE) -C backend/python/transformers-musicgen
+ $(MAKE) -C backend/python/parler-tts
$(MAKE) -C backend/python/vall-e-x
$(MAKE) -C backend/python/exllama
$(MAKE) -C backend/python/petals
diff --git a/backend/python/parler-tts/Makefile b/backend/python/parler-tts/Makefile
new file mode 100644
index 00000000..4497762e
--- /dev/null
+++ b/backend/python/parler-tts/Makefile
@@ -0,0 +1,39 @@
+export CONDA_ENV_PATH = "parler.yml"
+SKIP_CONDA?=0
+ifeq ($(BUILD_TYPE), cublas)
+export CONDA_ENV_PATH = "parler-nvidia.yml"
+endif
+
+# Intel GPU are supposed to have dependencies installed in the main python
+# environment, so we skip conda installation for SYCL builds.
+# https://github.com/intel/intel-extension-for-pytorch/issues/538
+ifneq (,$(findstring sycl,$(BUILD_TYPE)))
+export SKIP_CONDA=1
+endif
+
+.PHONY: parler-tts
+parler-tts: protogen
+ @echo "Installing $(CONDA_ENV_PATH)..."
+ bash install.sh $(CONDA_ENV_PATH)
+
+.PHONY: run
+run: protogen
+ @echo "Running transformers..."
+ bash run.sh
+ @echo "transformers run."
+
+.PHONY: test
+test: protogen
+ @echo "Testing transformers..."
+ bash test.sh
+ @echo "transformers tested."
+
+.PHONY: protogen
+protogen: backend_pb2_grpc.py backend_pb2.py
+
+.PHONY: protogen-clean
+protogen-clean:
+ $(RM) backend_pb2_grpc.py backend_pb2.py
+
+backend_pb2_grpc.py backend_pb2.py:
+ python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
\ No newline at end of file
diff --git a/backend/python/parler-tts/install.sh b/backend/python/parler-tts/install.sh
new file mode 100755
index 00000000..b9965b23
--- /dev/null
+++ b/backend/python/parler-tts/install.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+set -ex
+
+SKIP_CONDA=${SKIP_CONDA:-0}
+
+# Check if environment exist
+conda_env_exists(){
+ ! conda list --name "${@}" >/dev/null 2>/dev/null
+}
+
+if [ $SKIP_CONDA -eq 1 ]; then
+ echo "Skipping conda environment installation"
+else
+ export PATH=$PATH:/opt/conda/bin
+ if conda_env_exists "parler" ; then
+ echo "Creating virtual environment..."
+ conda env create --name parler --file $1
+ echo "Virtual environment created."
+ else
+ echo "Virtual environment already exists."
+ fi
+fi
+
+if [ $SKIP_CONDA -ne 1 ]; then
+ # Activate conda environment
+ source activate parler
+ # https://github.com/descriptinc/audiotools/issues/101
+ # incompatible protobuf versions.
+ curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o $CONDA_PREFIX/lib/python3.11/site-packages/google/protobuf/internal/builder.py
+fi
+
+if [ "$PIP_CACHE_PURGE" = true ] ; then
+ if [ $SKIP_CONDA -ne 1 ]; then
+ # Activate conda environment
+ source activate parler
+ fi
+
+ pip cache purge
+fi
\ No newline at end of file
diff --git a/backend/python/parler-tts/parler-nvidia.yml b/backend/python/parler-tts/parler-nvidia.yml
new file mode 100644
index 00000000..ed925e94
--- /dev/null
+++ b/backend/python/parler-tts/parler-nvidia.yml
@@ -0,0 +1,48 @@
+name: parler
+channels:
+ - defaults
+dependencies:
+ - _libgcc_mutex=0.1=main
+ - _openmp_mutex=5.1=1_gnu
+ - bzip2=1.0.8=h7b6447c_0
+ - ca-certificates=2023.08.22=h06a4308_0
+ - ld_impl_linux-64=2.38=h1181459_1
+ - libffi=3.4.4=h6a678d5_0
+ - libgcc-ng=11.2.0=h1234567_1
+ - libgomp=11.2.0=h1234567_1
+ - libstdcxx-ng=11.2.0=h1234567_1
+ - libuuid=1.41.5=h5eee18b_0
+ - ncurses=6.4=h6a678d5_0
+ - openssl=3.0.11=h7f8727e_2
+ - pip=23.2.1=py311h06a4308_0
+ - python=3.11.5=h955ad1f_0
+ - readline=8.2=h5eee18b_0
+ - setuptools=68.0.0=py311h06a4308_0
+ - sqlite=3.41.2=h5eee18b_0
+ - tk=8.6.12=h1ccaba5_0
+ - tzdata=2023c=h04d1e81_0
+ - wheel=0.41.2=py311h06a4308_0
+ - xz=5.4.2=h5eee18b_0
+ - zlib=1.2.13=h5eee18b_0
+ - pip:
+ - accelerate>=0.11.0
+ - grpcio==1.59.0
+ - numpy==1.26.0
+ - nvidia-cublas-cu12==12.1.3.1
+ - nvidia-cuda-cupti-cu12==12.1.105
+ - nvidia-cuda-nvrtc-cu12==12.1.105
+ - nvidia-cuda-runtime-cu12==12.1.105
+ - nvidia-cudnn-cu12==8.9.2.26
+ - nvidia-cufft-cu12==11.0.2.54
+ - nvidia-curand-cu12==10.3.2.106
+ - nvidia-cusolver-cu12==11.4.5.107
+ - nvidia-cusparse-cu12==12.1.0.106
+ - nvidia-nccl-cu12==2.18.1
+ - nvidia-nvjitlink-cu12==12.2.140
+ - nvidia-nvtx-cu12==12.1.105
+ - torch==2.1.0
+ - transformers>=4.34.0
+ - descript-audio-codec
+ - sentencepiece
+ - git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16
+prefix: /opt/conda/envs/diffusers
diff --git a/backend/python/parler-tts/parler.yml b/backend/python/parler-tts/parler.yml
new file mode 100644
index 00000000..fd0c3cb6
--- /dev/null
+++ b/backend/python/parler-tts/parler.yml
@@ -0,0 +1,36 @@
+name: parler
+channels:
+ - defaults
+dependencies:
+ - _libgcc_mutex=0.1=main
+ - _openmp_mutex=5.1=1_gnu
+ - bzip2=1.0.8=h7b6447c_0
+ - ca-certificates=2023.08.22=h06a4308_0
+ - ld_impl_linux-64=2.38=h1181459_1
+ - libffi=3.4.4=h6a678d5_0
+ - libgcc-ng=11.2.0=h1234567_1
+ - libgomp=11.2.0=h1234567_1
+ - libstdcxx-ng=11.2.0=h1234567_1
+ - libuuid=1.41.5=h5eee18b_0
+ - ncurses=6.4=h6a678d5_0
+ - openssl=3.0.11=h7f8727e_2
+ - pip=23.2.1=py311h06a4308_0
+ - python=3.11.5=h955ad1f_0
+ - readline=8.2=h5eee18b_0
+ - setuptools=68.0.0=py311h06a4308_0
+ - sqlite=3.41.2=h5eee18b_0
+ - tk=8.6.12=h1ccaba5_0
+ - tzdata=2023c=h04d1e81_0
+ - wheel=0.41.2=py311h06a4308_0
+ - xz=5.4.2=h5eee18b_0
+ - zlib=1.2.13=h5eee18b_0
+ - pip:
+ - accelerate>=0.11.0
+ - numpy==1.26.0
+ - grpcio==1.59.0
+ - torch==2.1.0
+ - transformers>=4.34.0
+ - descript-audio-codec
+ - sentencepiece
+ - git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16
+prefix: /opt/conda/envs/parler
diff --git a/backend/python/parler-tts/parler_tts_server.py b/backend/python/parler-tts/parler_tts_server.py
new file mode 100644
index 00000000..655990d7
--- /dev/null
+++ b/backend/python/parler-tts/parler_tts_server.py
@@ -0,0 +1,125 @@
+#!/usr/bin/env python3
+"""
+Extra gRPC server for MusicgenForConditionalGeneration models.
+"""
+from concurrent import futures
+
+import argparse
+import signal
+import sys
+import os
+
+import time
+import backend_pb2
+import backend_pb2_grpc
+
+import grpc
+
+from scipy.io.wavfile import write as write_wav
+
+from parler_tts import ParlerTTSForConditionalGeneration
+from transformers import AutoTokenizer
+import soundfile as sf
+import torch
+
+_ONE_DAY_IN_SECONDS = 60 * 60 * 24
+
+# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
+MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
+
+# Implement the BackendServicer class with the service methods
+class BackendServicer(backend_pb2_grpc.BackendServicer):
+ """
+ A gRPC servicer for the backend service.
+
+ This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding.
+ """
+ def Health(self, request, context):
+ """
+ A gRPC method that returns the health status of the backend service.
+
+ Args:
+ request: A HealthRequest object that contains the request parameters.
+ context: A grpc.ServicerContext object that provides information about the RPC.
+
+ Returns:
+ A Reply object that contains the health status of the backend service.
+ """
+ return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
+
+ def LoadModel(self, request, context):
+ """
+ A gRPC method that loads a model into memory.
+
+ Args:
+ request: A LoadModelRequest object that contains the request parameters.
+ context: A grpc.ServicerContext object that provides information about the RPC.
+
+ Returns:
+ A Result object that contains the result of the LoadModel operation.
+ """
+ model_name = request.Model
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
+ try:
+ self.model = ParlerTTSForConditionalGeneration.from_pretrained(model_name).to(device)
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+ except Exception as err:
+ return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
+
+ return backend_pb2.Result(message="Model loaded successfully", success=True)
+
+ def TTS(self, request, context):
+ model_name = request.model
+ voice = request.voice
+ if voice == "":
+ voice = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast."
+ if model_name == "":
+ return backend_pb2.Result(success=False, message="request.model is required")
+ try:
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
+ input_ids = self.tokenizer(voice, return_tensors="pt").input_ids.to(device)
+ prompt_input_ids = self.tokenizer(request.text, return_tensors="pt").input_ids.to(device)
+
+ generation = self.model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
+ audio_arr = generation.cpu().numpy().squeeze()
+ print("[parler-tts] TTS generated!", file=sys.stderr)
+ sf.write(request.dst, audio_arr, self.model.config.sampling_rate)
+ print("[parler-tts] TTS saved to", request.dst, file=sys.stderr)
+ print("[parler-tts] TTS for", file=sys.stderr)
+ print(request, file=sys.stderr)
+ except Exception as err:
+ return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
+ return backend_pb2.Result(success=True)
+
+
+def serve(address):
+ server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
+ backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
+ server.add_insecure_port(address)
+ server.start()
+ print("[parler-tts] Server started. Listening on: " + address, file=sys.stderr)
+
+ # Define the signal handler function
+ def signal_handler(sig, frame):
+ print("[parler-tts] Received termination signal. Shutting down...")
+ server.stop(0)
+ sys.exit(0)
+
+ # Set the signal handlers for SIGINT and SIGTERM
+ signal.signal(signal.SIGINT, signal_handler)
+ signal.signal(signal.SIGTERM, signal_handler)
+
+ try:
+ while True:
+ time.sleep(_ONE_DAY_IN_SECONDS)
+ except KeyboardInterrupt:
+ server.stop(0)
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description="Run the gRPC server.")
+ parser.add_argument(
+ "--addr", default="localhost:50051", help="The address to bind the server to."
+ )
+ args = parser.parse_args()
+ print(f"[parler-tts] startup: {args}", file=sys.stderr)
+ serve(args.addr)
diff --git a/backend/python/parler-tts/run.sh b/backend/python/parler-tts/run.sh
new file mode 100644
index 00000000..08e42198
--- /dev/null
+++ b/backend/python/parler-tts/run.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+##
+## A bash script wrapper that runs the parler-tts server with conda
+
+echo "Launching gRPC server for parler-tts"
+
+export PATH=$PATH:/opt/conda/bin
+
+# Activate conda environment
+source activate parler
+
+# get the directory where the bash script is located
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+python $DIR/parler_tts_server.py $@
diff --git a/backend/python/parler-tts/test.sh b/backend/python/parler-tts/test.sh
new file mode 100644
index 00000000..1bd15fd1
--- /dev/null
+++ b/backend/python/parler-tts/test.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+##
+## A bash script wrapper that runs the transformers server with conda
+
+# Activate conda environment
+source activate parler
+
+# get the directory where the bash script is located
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+python -m unittest $DIR/test_parler.py
\ No newline at end of file
diff --git a/backend/python/parler-tts/test_parler.py b/backend/python/parler-tts/test_parler.py
new file mode 100644
index 00000000..ce9b66ac
--- /dev/null
+++ b/backend/python/parler-tts/test_parler.py
@@ -0,0 +1,81 @@
+"""
+A test script to test the gRPC service
+"""
+import unittest
+import subprocess
+import time
+import backend_pb2
+import backend_pb2_grpc
+
+import grpc
+
+
+class TestBackendServicer(unittest.TestCase):
+ """
+ TestBackendServicer is the class that tests the gRPC service
+ """
+ def setUp(self):
+ """
+ This method sets up the gRPC service by starting the server
+ """
+ self.service = subprocess.Popen(["python3", "parler_tts_server.py", "--addr", "localhost:50051"])
+ time.sleep(10)
+
+ def tearDown(self) -> None:
+ """
+ This method tears down the gRPC service by terminating the server
+ """
+ self.service.terminate()
+ self.service.wait()
+
+ def test_server_startup(self):
+ """
+ This method tests if the server starts up successfully
+ """
+ try:
+ self.setUp()
+ with grpc.insecure_channel("localhost:50051") as channel:
+ stub = backend_pb2_grpc.BackendStub(channel)
+ response = stub.Health(backend_pb2.HealthMessage())
+ self.assertEqual(response.message, b'OK')
+ except Exception as err:
+ print(err)
+ self.fail("Server failed to start")
+ finally:
+ self.tearDown()
+
+ def test_load_model(self):
+ """
+ This method tests if the model is loaded successfully
+ """
+ try:
+ self.setUp()
+ with grpc.insecure_channel("localhost:50051") as channel:
+ stub = backend_pb2_grpc.BackendStub(channel)
+ response = stub.LoadModel(backend_pb2.ModelOptions(Model="parler-tts/parler_tts_mini_v0.1"))
+ self.assertTrue(response.success)
+ self.assertEqual(response.message, "Model loaded successfully")
+ except Exception as err:
+ print(err)
+ self.fail("LoadModel service failed")
+ finally:
+ self.tearDown()
+
+ def test_tts(self):
+ """
+ This method tests if the embeddings are generated successfully
+ """
+ try:
+ self.setUp()
+ with grpc.insecure_channel("localhost:50051") as channel:
+ stub = backend_pb2_grpc.BackendStub(channel)
+ response = stub.LoadModel(backend_pb2.ModelOptions(Model="parler-tts/parler_tts_mini_v0.1"))
+ self.assertTrue(response.success)
+ tts_request = backend_pb2.TTSRequest(text="Hey, how are you doing today?")
+ tts_response = stub.TTS(tts_request)
+ self.assertIsNotNone(tts_response)
+ except Exception as err:
+ print(err)
+ self.fail("TTS service failed")
+ finally:
+ self.tearDown()
\ No newline at end of file
diff --git a/backend/python/transformers-musicgen/run.sh b/backend/python/transformers-musicgen/run.sh
index d3dcb968..3d3ffcfd 100644
--- a/backend/python/transformers-musicgen/run.sh
+++ b/backend/python/transformers-musicgen/run.sh
@@ -8,7 +8,7 @@ echo "Launching gRPC server for transformers-musicgen"
export PATH=$PATH:/opt/conda/bin
# Activate conda environment
-source activate transformers-musicgen
+source activate transformers
# get the directory where the bash script is located
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
From d5699dbf4f3041eb7f01c996c0a2b3b59319b1bc Mon Sep 17 00:00:00 2001
From: Dave
Date: Sat, 13 Apr 2024 13:01:17 -0400
Subject: [PATCH 0153/2750] fix - correct checkout versions (#2029)
minor fix - bump some checkout@v3 to checkout@v4 to match and clean up warnings
Signed-off-by: Dave Lee
---
.github/workflows/dependabot_auto.yml | 2 +-
.github/workflows/localaibot_automerge.yml | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/dependabot_auto.yml b/.github/workflows/dependabot_auto.yml
index 51337d20..be3a40a8 100644
--- a/.github/workflows/dependabot_auto.yml
+++ b/.github/workflows/dependabot_auto.yml
@@ -20,7 +20,7 @@ jobs:
skip-commit-verification: true
- name: Checkout repository
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Approve a PR if not already approved
run: |
diff --git a/.github/workflows/localaibot_automerge.yml b/.github/workflows/localaibot_automerge.yml
index 74c725f3..6a1ab891 100644
--- a/.github/workflows/localaibot_automerge.yml
+++ b/.github/workflows/localaibot_automerge.yml
@@ -13,7 +13,7 @@ jobs:
if: ${{ github.actor == 'localai-bot' }}
steps:
- name: Checkout repository
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Approve a PR if not already approved
run: |
From 6b07ded11909bf8f52a8e6de402ac8bf91201831 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Sat, 13 Apr 2024 19:12:54 +0200
Subject: [PATCH 0154/2750] Update README.md
Signed-off-by: Ettore Di Giacinto
---
README.md | 7 +------
1 file changed, 1 insertion(+), 6 deletions(-)
diff --git a/README.md b/README.md
index 76a5fc08..4c2f68b2 100644
--- a/README.md
+++ b/README.md
@@ -50,17 +50,12 @@
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
+- Parler-TTS: https://github.com/mudler/LocalAI/pull/2027
- Landing page: https://github.com/mudler/LocalAI/pull/1922
- Openvino support: https://github.com/mudler/LocalAI/pull/1892
- Vector store: https://github.com/mudler/LocalAI/pull/1795
- All-in-one container image: https://github.com/mudler/LocalAI/issues/1855
- Parallel function calling: https://github.com/mudler/LocalAI/pull/1726 / Tools API support: https://github.com/mudler/LocalAI/pull/1715
-- Upload file API: https://github.com/mudler/LocalAI/pull/1703
-- ROCm container images: https://github.com/mudler/LocalAI/pull/1595 / Intel GPU support (sycl, transformers, diffusers): https://github.com/mudler/LocalAI/issues/1653
-- Mamba support: https://github.com/mudler/LocalAI/pull/1589
-- Start and share models with config file: https://github.com/mudler/LocalAI/pull/1522
-- 🐸 Coqui: https://github.com/mudler/LocalAI/pull/1489
-- Img2vid https://github.com/mudler/LocalAI/pull/1442
Hot topics (looking for contributors):
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
From 4486db912b62e31bffe662b977a31567e62ecbfc Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Sat, 13 Apr 2024 22:57:46 +0200
Subject: [PATCH 0155/2750] Update quickstart.md
Signed-off-by: Ettore Di Giacinto
---
docs/content/docs/getting-started/quickstart.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/content/docs/getting-started/quickstart.md b/docs/content/docs/getting-started/quickstart.md
index ff1dc6a7..ab45e5aa 100644
--- a/docs/content/docs/getting-started/quickstart.md
+++ b/docs/content/docs/getting-started/quickstart.md
@@ -169,7 +169,7 @@ Call functions
```bash
-curl https://localhost:8080/v1/chat/completions \
+curl http://localhost:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "gpt-4",
From b739cbb86b9734bd62d4f63fad6583cf97059ea5 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Sun, 14 Apr 2024 10:57:07 +0200
Subject: [PATCH 0156/2750] Revert "build(deps): bump the pip group across 4
directories with 8 updates" (#2030)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Revert "build(deps): bump the pip group across 4 directories with 8 updates (…"
This reverts commit e0dee52a2ab811fccc18f309a6c5fefcb4725448.
---
docs/data/version.json | 2 +-
examples/functions/requirements.txt | 2 +-
examples/langchain-chroma/requirements.txt | 4 ++--
.../langchainpy-localai-example/requirements.txt | 12 ++++++------
examples/streamlit-bot/requirements.txt | 2 +-
5 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/docs/data/version.json b/docs/data/version.json
index 6a618115..1b6a2161 100644
--- a/docs/data/version.json
+++ b/docs/data/version.json
@@ -1,3 +1,3 @@
{
- "version": "v2.12.4"
+ "version": "v2.12.3"
}
diff --git a/examples/functions/requirements.txt b/examples/functions/requirements.txt
index 759c5b03..7164e011 100644
--- a/examples/functions/requirements.txt
+++ b/examples/functions/requirements.txt
@@ -1,2 +1,2 @@
-langchain==0.1.0
+langchain==0.0.234
openai==0.27.8
diff --git a/examples/langchain-chroma/requirements.txt b/examples/langchain-chroma/requirements.txt
index cdf466b9..b9e649c5 100644
--- a/examples/langchain-chroma/requirements.txt
+++ b/examples/langchain-chroma/requirements.txt
@@ -1,4 +1,4 @@
-langchain==0.1.0
+langchain==0.0.160
openai==0.27.6
chromadb==0.3.21
-llama-index==0.9.36
\ No newline at end of file
+llama-index==0.6.2
\ No newline at end of file
diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt
index 1e63b0bf..2de5bcf0 100644
--- a/examples/langchain/langchainpy-localai-example/requirements.txt
+++ b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -1,16 +1,16 @@
-aiohttp==3.9.2
+aiohttp==3.8.4
aiosignal==1.3.1
async-timeout==4.0.2
attrs==23.1.0
-certifi==2023.7.22
+certifi==2022.12.7
charset-normalizer==3.1.0
colorama==0.4.6
dataclasses-json==0.5.7
debugpy==1.6.7
frozenlist==1.3.3
greenlet==2.0.2
-idna==3.7
-langchain==0.1.0
+idna==3.4
+langchain==0.0.159
marshmallow==3.19.0
marshmallow-enum==1.5.1
multidict==6.0.4
@@ -22,11 +22,11 @@ openapi-schema-pydantic==1.2.4
packaging==23.1
pydantic==1.10.7
PyYAML==6.0
-requests==2.31.0
+requests==2.29.0
SQLAlchemy==2.0.12
tenacity==8.2.2
tqdm==4.65.0
typing-inspect==0.8.0
typing_extensions==4.5.0
-urllib3==1.26.18
+urllib3==1.26.15
yarl==1.9.2
diff --git a/examples/streamlit-bot/requirements.txt b/examples/streamlit-bot/requirements.txt
index 1fcd5093..ae527c76 100644
--- a/examples/streamlit-bot/requirements.txt
+++ b/examples/streamlit-bot/requirements.txt
@@ -1,2 +1,2 @@
-streamlit==1.30.0
+streamlit==1.26.0
requests
\ No newline at end of file
From 57bd365d876ae55f950821707485f183a6f6685a Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Mon, 15 Apr 2024 01:31:43 +0200
Subject: [PATCH 0157/2750] :arrow_up: Update docs version mudler/LocalAI
(#2032)
Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
docs/data/version.json | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/data/version.json b/docs/data/version.json
index 1b6a2161..6a618115 100644
--- a/docs/data/version.json
+++ b/docs/data/version.json
@@ -1,3 +1,3 @@
{
- "version": "v2.12.3"
+ "version": "v2.12.4"
}
From de3a1a0a8e1dcf22aed34cad336962ec53ed89cc Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Mon, 15 Apr 2024 01:35:44 +0200
Subject: [PATCH 0158/2750] :arrow_up: Update ggerganov/llama.cpp (#2033)
Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index d5bc3739..04745f39 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=4bd0f93e4ab4fe6682e7d0241c1bdec1397e954a
+CPPLLAMA_VERSION?=1958f7e06ca2d2e3ab5698cc67513ba359144d8e
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
From e843d7df0e8b177ab122a9f7bfa7196274ccd204 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Mon, 15 Apr 2024 19:47:11 +0200
Subject: [PATCH 0159/2750] feat(grpc): return consumed token count and update
response accordingly (#2035)
Fixes: #1920
---
backend/backend.proto | 2 ++
backend/cpp/llama/grpc-server.cpp | 8 ++++++++
core/backend/llm.go | 6 ++++++
core/services/openai.go | 8 ++++----
4 files changed, 20 insertions(+), 4 deletions(-)
diff --git a/backend/backend.proto b/backend/backend.proto
index 56d919ef..62e1a1a6 100644
--- a/backend/backend.proto
+++ b/backend/backend.proto
@@ -114,6 +114,8 @@ message PredictOptions {
// The response message containing the result
message Reply {
bytes message = 1;
+ int32 tokens = 2;
+ int32 prompt_tokens = 3;
}
message ModelOptions {
diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp
index a2e39a9c..6fb08658 100644
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -2332,6 +2332,10 @@ public:
std::string completion_text = result.result_json.value("content", "");
reply.set_message(completion_text);
+ int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0);
+ reply.set_tokens(tokens_predicted);
+ int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
+ reply.set_prompt_tokens(tokens_evaluated);
// Send the reply
writer->Write(reply);
@@ -2357,6 +2361,10 @@ public:
task_result result = llama.queue_results.recv(task_id);
if (!result.error && result.stop) {
completion_text = result.result_json.value("content", "");
+ int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0);
+ int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
+ reply->set_prompt_tokens(tokens_evaluated);
+ reply->set_tokens(tokens_predicted);
reply->set_message(completion_text);
}
else
diff --git a/core/backend/llm.go b/core/backend/llm.go
index 1878e87a..75766d78 100644
--- a/core/backend/llm.go
+++ b/core/backend/llm.go
@@ -189,6 +189,12 @@ func (llmbs *LLMBackendService) Inference(ctx context.Context, req *LLMRequest,
} else {
go func() {
reply, err := inferenceModel.Predict(ctx, grpcPredOpts)
+ if tokenUsage.Prompt == 0 {
+ tokenUsage.Prompt = int(reply.PromptTokens)
+ }
+ if tokenUsage.Completion == 0 {
+ tokenUsage.Completion = int(reply.Tokens)
+ }
if err != nil {
rawResultChannel <- concurrency.ErrorOr[*LLMResponse]{Error: err}
close(rawResultChannel)
diff --git a/core/services/openai.go b/core/services/openai.go
index 0f61d6f4..3fa041f5 100644
--- a/core/services/openai.go
+++ b/core/services/openai.go
@@ -160,7 +160,7 @@ func (oais *OpenAIService) GenerateTextFromRequest(request *schema.OpenAIRequest
bc, request, err := oais.getConfig(request)
if err != nil {
- log.Error().Msgf("[oais::GenerateTextFromRequest] error getting configuration: %q", err)
+ log.Error().Err(err).Msgf("[oais::GenerateTextFromRequest] error getting configuration")
return
}
@@ -259,7 +259,7 @@ func (oais *OpenAIService) GenerateTextFromRequest(request *schema.OpenAIRequest
// If any of the setup goroutines experienced an error, quit early here.
if setupError != nil {
go func() {
- log.Error().Msgf("[OAIS GenerateTextFromRequest] caught an error during setup: %q", setupError)
+ log.Error().Err(setupError).Msgf("[OAIS GenerateTextFromRequest] caught an error during setup")
rawFinalResultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: setupError}
close(rawFinalResultChannel)
}()
@@ -603,7 +603,7 @@ func (oais *OpenAIService) GenerateFromMultipleMessagesChatRequest(request *sche
Usage: schema.OpenAIUsage{
PromptTokens: rawResult.Value.Usage.Prompt,
CompletionTokens: rawResult.Value.Usage.Completion,
- TotalTokens: rawResult.Value.Usage.Prompt + rawResult.Value.Usage.Prompt,
+ TotalTokens: rawResult.Value.Usage.Prompt + rawResult.Value.Usage.Completion,
},
}
@@ -644,7 +644,7 @@ func (oais *OpenAIService) GenerateFromMultipleMessagesChatRequest(request *sche
Usage: schema.OpenAIUsage{
PromptTokens: rawResult.Value.Usage.Prompt,
CompletionTokens: rawResult.Value.Usage.Completion,
- TotalTokens: rawResult.Value.Usage.Prompt + rawResult.Value.Usage.Prompt,
+ TotalTokens: rawResult.Value.Usage.Prompt + rawResult.Value.Usage.Completion,
},
}
From c751a4ac06bab3736d464d16cadf02a04f822bb5 Mon Sep 17 00:00:00 2001
From: cryptk <421501+cryptk@users.noreply.github.com>
Date: Mon, 15 Apr 2024 14:47:51 -0500
Subject: [PATCH 0160/2750] fix: remove build path from help text documentation
(#2037)
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
---
docs/content/docs/advanced/advanced-usage.md | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/docs/content/docs/advanced/advanced-usage.md b/docs/content/docs/advanced/advanced-usage.md
index dace5803..4bd16030 100644
--- a/docs/content/docs/advanced/advanced-usage.md
+++ b/docs/content/docs/advanced/advanced-usage.md
@@ -384,6 +384,8 @@ docker run --env-file .env localai
You can control LocalAI with command line arguments, to specify a binding address, or the number of threads. Any command line parameter can be specified via an environment variable.
+In the help text below, BASEPATH is the location that local-ai is being executed from
+
#### Global Flags
| Parameter | Default | Description | Environment Variable |
|-----------|---------|-------------|----------------------|
@@ -393,13 +395,13 @@ You can control LocalAI with command line arguments, to specify a binding addres
#### Storage Flags
| Parameter | Default | Description | Environment Variable |
|-----------|---------|-------------|----------------------|
-| --models-path | /home/cryptk/Documents/sourcecode/LocalAI/models | Path containing models used for inferencing | $LOCALAI_MODELS_PATH |
+| --models-path | BASEPATH/models | Path containing models used for inferencing | $LOCALAI_MODELS_PATH |
| --backend-assets-path |/tmp/localai/backend_data | Path used to extract libraries that are required by some of the backends in runtime | $LOCALAI_BACKEND_ASSETS_PATH |
| --image-path | /tmp/generated/images | Location for images generated by backends (e.g. stablediffusion) | $LOCALAI_IMAGE_PATH |
| --audio-path | /tmp/generated/audio | Location for audio generated by backends (e.g. piper) | $LOCALAI_AUDIO_PATH |
| --upload-path | /tmp/localai/upload | Path to store uploads from files api | $LOCALAI_UPLOAD_PATH |
| --config-path | /tmp/localai/config | | $LOCALAI_CONFIG_PATH |
-| --localai-config-dir | /home/cryptk/Documents/sourcecode/LocalAI/configuration | Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json) | $LOCALAI_CONFIG_DIR |
+| --localai-config-dir | BASEPATH/configuration | Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json) | $LOCALAI_CONFIG_DIR |
| --models-config-file | STRING | YAML file containing a list of model backend configs | $LOCALAI_MODELS_CONFIG_FILE |
#### Models Flags
From 538a086309b91f4594d5513c0fd88e981877a83d Mon Sep 17 00:00:00 2001
From: cryptk <421501+cryptk@users.noreply.github.com>
Date: Mon, 15 Apr 2024 15:13:59 -0500
Subject: [PATCH 0161/2750] fix: previous CLI rework broke debug logging
(#2036)
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
Co-authored-by: Dave
---
core/cli/run.go | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/core/cli/run.go b/core/cli/run.go
index c3b186c0..cafc0b54 100644
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -60,7 +60,7 @@ func (r *RunCMD) Run(ctx *Context) error {
config.WithYAMLConfigPreload(r.PreloadModelsConfig),
config.WithModelPath(r.ModelsPath),
config.WithContextSize(r.ContextSize),
- config.WithDebug(ctx.Debug),
+ config.WithDebug(*ctx.LogLevel == "debug"),
config.WithImageDir(r.ImagePath),
config.WithAudioDir(r.AudioPath),
config.WithUploadDir(r.UploadPath),
From b72c6cc9fc6c16db301c2b0d992ba03c348f43b1 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 15 Apr 2024 20:52:39 +0000
Subject: [PATCH 0162/2750] build(deps): bump softprops/action-gh-release from
1 to 2 (#2039)
Bumps [softprops/action-gh-release](https://github.com/softprops/action-gh-release) from 1 to 2.
- [Release notes](https://github.com/softprops/action-gh-release/releases)
- [Changelog](https://github.com/softprops/action-gh-release/blob/master/CHANGELOG.md)
- [Commits](https://github.com/softprops/action-gh-release/compare/v1...v2)
---
updated-dependencies:
- dependency-name: softprops/action-gh-release
dependency-type: direct:production
update-type: version-update:semver-major
...
Signed-off-by: dependabot[bot]
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
.github/workflows/release.yaml | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index 33c640cc..dc887fc1 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -92,7 +92,7 @@ jobs:
name: LocalAI-linux-${{ matrix.build }}
path: release/
- name: Release
- uses: softprops/action-gh-release@v1
+ uses: softprops/action-gh-release@v2
if: startsWith(github.ref, 'refs/tags/')
with:
files: |
@@ -164,7 +164,7 @@ jobs:
name: LocalAI-MacOS-${{ matrix.build }}
path: release/
- name: Release
- uses: softprops/action-gh-release@v1
+ uses: softprops/action-gh-release@v2
if: startsWith(github.ref, 'refs/tags/')
with:
files: |
@@ -211,7 +211,7 @@ jobs:
name: LocalAI-MacOS-arm64-${{ matrix.build }}
path: release/
- name: Release
- uses: softprops/action-gh-release@v1
+ uses: softprops/action-gh-release@v2
if: startsWith(github.ref, 'refs/tags/')
with:
files: |
From 46609e936e5e644671855b004e89317300d3cfb9 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 15 Apr 2024 21:37:06 +0000
Subject: [PATCH 0163/2750] build(deps): bump dependabot/fetch-metadata from
1.3.4 to 2.0.0 (#2040)
Bumps [dependabot/fetch-metadata](https://github.com/dependabot/fetch-metadata) from 1.3.4 to 2.0.0.
- [Release notes](https://github.com/dependabot/fetch-metadata/releases)
- [Commits](https://github.com/dependabot/fetch-metadata/compare/v1.3.4...v2.0.0)
---
updated-dependencies:
- dependency-name: dependabot/fetch-metadata
dependency-type: direct:production
update-type: version-update:semver-major
...
Signed-off-by: dependabot[bot]
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
.github/workflows/dependabot_auto.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/dependabot_auto.yml b/.github/workflows/dependabot_auto.yml
index be3a40a8..8e32aee1 100644
--- a/.github/workflows/dependabot_auto.yml
+++ b/.github/workflows/dependabot_auto.yml
@@ -14,7 +14,7 @@ jobs:
steps:
- name: Dependabot metadata
id: metadata
- uses: dependabot/fetch-metadata@v1.3.4
+ uses: dependabot/fetch-metadata@v2.0.0
with:
github-token: "${{ secrets.GITHUB_TOKEN }}"
skip-commit-verification: true
From 320d8a48d9bd09b5fda1c4330d8d693ccc705fcc Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 15 Apr 2024 22:02:44 +0000
Subject: [PATCH 0164/2750] build(deps): bump github/codeql-action from 2 to 3
(#2041)
Bumps [github/codeql-action](https://github.com/github/codeql-action) from 2 to 3.
- [Release notes](https://github.com/github/codeql-action/releases)
- [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md)
- [Commits](https://github.com/github/codeql-action/compare/v2...v3)
---
updated-dependencies:
- dependency-name: github/codeql-action
dependency-type: direct:production
update-type: version-update:semver-major
...
Signed-off-by: dependabot[bot]
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
.github/workflows/secscan.yaml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/secscan.yaml b/.github/workflows/secscan.yaml
index 884b84d5..d9743d9e 100644
--- a/.github/workflows/secscan.yaml
+++ b/.github/workflows/secscan.yaml
@@ -24,7 +24,7 @@ jobs:
args: '-no-fail -fmt sarif -out results.sarif ./...'
- name: Upload SARIF file
if: ${{ github.actor != 'dependabot[bot]' }}
- uses: github/codeql-action/upload-sarif@v2
+ uses: github/codeql-action/upload-sarif@v3
with:
# Path to SARIF file relative to the root of the repository
sarif_file: results.sarif
From cdece3879f4658eaccb3394d9e29b9534c8b773b Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Tue, 16 Apr 2024 00:47:29 +0200
Subject: [PATCH 0165/2750] :arrow_up: Update ggerganov/llama.cpp (#2043)
Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index 04745f39..37130567 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=1958f7e06ca2d2e3ab5698cc67513ba359144d8e
+CPPLLAMA_VERSION?=7593639ce335e8d7f89aa9a54d616951f273af60
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
From 0cc1ad21889d9dca21f71dfe7f47a87a0ddf0012 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Tue, 16 Apr 2024 01:27:52 +0200
Subject: [PATCH 0166/2750] :arrow_up: Update ggerganov/whisper.cpp (#2042)
Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index 37130567..7cde8fa7 100644
--- a/Makefile
+++ b/Makefile
@@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
# whisper.cpp version
-WHISPER_CPP_VERSION?=8f253ef3af1c62c04316ba4afa7145fc4d701a8c
+WHISPER_CPP_VERSION?=9fab28135c7867bb7eccd9ebcd2ea8d52e42ca81
# bert.cpp version
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
From fdec8a9d00a034ccd8e075008edd165147edf328 Mon Sep 17 00:00:00 2001
From: Dave
Date: Mon, 15 Apr 2024 21:46:36 -0400
Subject: [PATCH 0167/2750] fix: action-tmate back to upstream, dead code
removal (#2038)
cleanup: upstream action-tmate has taken my PR, drop master reference. Also remove dead code from api.go
Signed-off-by: Dave Lee
---
.github/workflows/test.yml | 6 +++---
core/http/api.go | 18 ------------------
2 files changed, 3 insertions(+), 21 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 29bd3e08..156294b5 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -121,7 +121,7 @@ jobs:
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
- name: Setup tmate session if tests fail
if: ${{ failure() }}
- uses: dave-gray101/action-tmate@master
+ uses: mxschmitt/action-tmate@v3.18
with:
connect-timeout-seconds: 180
@@ -174,7 +174,7 @@ jobs:
make run-e2e-aio
- name: Setup tmate session if tests fail
if: ${{ failure() }}
- uses: dave-gray101/action-tmate@master
+ uses: mxschmitt/action-tmate@v3.18
with:
connect-timeout-seconds: 180
@@ -209,6 +209,6 @@ jobs:
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make --jobs 4 --output-sync=target test
- name: Setup tmate session if tests fail
if: ${{ failure() }}
- uses: dave-gray101/action-tmate@master
+ uses: mxschmitt/action-tmate@v3.18
with:
connect-timeout-seconds: 180
\ No newline at end of file
diff --git a/core/http/api.go b/core/http/api.go
index 5c9095ea..7094899a 100644
--- a/core/http/api.go
+++ b/core/http/api.go
@@ -108,24 +108,6 @@ func App(application *core.Application) (*fiber.App, error) {
return c.Next()
}
- // // Check for api_keys.json file
- // fileContent, err := os.ReadFile("api_keys.json")
- // if err == nil {
- // // Parse JSON content from the file
- // var fileKeys []string
- // err := json.Unmarshal(fileContent, &fileKeys)
- // if err != nil {
- // return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{"message": "Error parsing api_keys.json"})
- // }
-
- // // Add file keys to options.ApiKeys
- // application.ApplicationConfig.ApiKeys = append(application.ApplicationConfig.ApiKeys, fileKeys...)
- // }
-
- // if len(application.ApplicationConfig.ApiKeys) == 0 {
- // return c.Next()
- // }
-
authHeader := readAuthHeader(c)
if authHeader == "" {
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Authorization header missing"})
From df4a13a08bf91491652e7baa65bd1eafa2c0ff44 Mon Sep 17 00:00:00 2001
From: Adrien Brault
Date: Tue, 16 Apr 2024 11:10:23 +0200
Subject: [PATCH 0168/2750] docs: fix stores link (#2044)
Signed-off-by: Adrien Brault
---
docs/content/docs/overview.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/content/docs/overview.md b/docs/content/docs/overview.md
index 6aede1d6..5224bc49 100644
--- a/docs/content/docs/overview.md
+++ b/docs/content/docs/overview.md
@@ -100,7 +100,7 @@ Note that this started just as a fun weekend project by [mudler](https://github.
- ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
- 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/)
- 🆕 [Vision API](https://localai.io/features/gpt-vision/)
-- 💾 [Stores](https://localai.io/features/stores)
+- 💾 [Stores](https://localai.io/stores)
## Contribute and help
From 33c78d2228891caacb9d8bc7dc2c567caaf12a53 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Tue, 16 Apr 2024 15:54:14 +0200
Subject: [PATCH 0169/2750] feat(store): add Golang client (#1977)
This adds a basic store client for Go
Signed-off-by: Ettore Di Giacinto
---
core/clients/store.go | 151 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 151 insertions(+)
create mode 100644 core/clients/store.go
diff --git a/core/clients/store.go b/core/clients/store.go
new file mode 100644
index 00000000..f737ee42
--- /dev/null
+++ b/core/clients/store.go
@@ -0,0 +1,151 @@
+package clients
+
+import (
+ "bytes"
+ "encoding/json"
+ "fmt"
+ "io"
+ "net/http"
+)
+
+// Define a struct to hold the store API client
+type StoreClient struct {
+ BaseURL string
+ Client *http.Client
+}
+
+type SetRequest struct {
+ Keys [][]float32 `json:"keys"`
+ Values []string `json:"values"`
+}
+
+type GetRequest struct {
+ Keys [][]float32 `json:"keys"`
+}
+
+type GetResponse struct {
+ Keys [][]float32 `json:"keys"`
+ Values []string `json:"values"`
+}
+
+type DeleteRequest struct {
+ Keys [][]float32 `json:"keys"`
+}
+
+type FindRequest struct {
+ TopK int `json:"topk"`
+ Key []float32 `json:"key"`
+}
+
+type FindResponse struct {
+ Keys [][]float32 `json:"keys"`
+ Values []string `json:"values"`
+ Similarities []float32 `json:"similarities"`
+}
+
+// Constructor for StoreClient
+func NewStoreClient(baseUrl string) *StoreClient {
+ return &StoreClient{
+ BaseURL: baseUrl,
+ Client: &http.Client{},
+ }
+}
+
+// Implement Set method
+func (c *StoreClient) Set(req SetRequest) error {
+ return c.doRequest("stores/set", req)
+}
+
+// Implement Get method
+func (c *StoreClient) Get(req GetRequest) (*GetResponse, error) {
+ body, err := c.doRequestWithResponse("stores/get", req)
+ if err != nil {
+ return nil, err
+ }
+
+ var resp GetResponse
+ err = json.Unmarshal(body, &resp)
+ if err != nil {
+ return nil, err
+ }
+
+ return &resp, nil
+}
+
+// Implement Delete method
+func (c *StoreClient) Delete(req DeleteRequest) error {
+ return c.doRequest("stores/delete", req)
+}
+
+// Implement Find method
+func (c *StoreClient) Find(req FindRequest) (*FindResponse, error) {
+ body, err := c.doRequestWithResponse("stores/find", req)
+ if err != nil {
+ return nil, err
+ }
+
+ var resp FindResponse
+ err = json.Unmarshal(body, &resp)
+ if err != nil {
+ return nil, err
+ }
+
+ return &resp, nil
+}
+
+// Helper function to perform a request without expecting a response body
+func (c *StoreClient) doRequest(path string, data interface{}) error {
+ jsonData, err := json.Marshal(data)
+ if err != nil {
+ return err
+ }
+
+ req, err := http.NewRequest("POST", c.BaseURL+"/"+path, bytes.NewBuffer(jsonData))
+ if err != nil {
+ return err
+ }
+ req.Header.Set("Content-Type", "application/json")
+
+ resp, err := c.Client.Do(req)
+ if err != nil {
+ return err
+ }
+ defer resp.Body.Close()
+
+ if resp.StatusCode != http.StatusOK {
+ return fmt.Errorf("API request to %s failed with status code %d", path, resp.StatusCode)
+ }
+
+ return nil
+}
+
+// Helper function to perform a request and parse the response body
+func (c *StoreClient) doRequestWithResponse(path string, data interface{}) ([]byte, error) {
+ jsonData, err := json.Marshal(data)
+ if err != nil {
+ return nil, err
+ }
+
+ req, err := http.NewRequest("POST", c.BaseURL+"/"+path, bytes.NewBuffer(jsonData))
+ if err != nil {
+ return nil, err
+ }
+ req.Header.Set("Content-Type", "application/json")
+
+ resp, err := c.Client.Do(req)
+ if err != nil {
+ return nil, err
+ }
+ defer resp.Body.Close()
+
+ if resp.StatusCode != http.StatusOK {
+ return nil, fmt.Errorf("API request to %s failed with status code %d", path, resp.StatusCode)
+ }
+
+ body, err := io.ReadAll(resp.Body)
+ if err != nil {
+ return nil, err
+ }
+
+ return body, nil
+}
From bcaa320f3611deb3c897b51c0240a186b51f21ba Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 16 Apr 2024 19:49:54 +0000
Subject: [PATCH 0170/2750] build(deps): bump the pip group across 4
directories with 8 updates (#2049)
Bumps the pip group with 1 update in the /examples/functions directory: [langchain](https://github.com/langchain-ai/langchain).
Bumps the pip group with 2 updates in the /examples/langchain-chroma directory: [langchain](https://github.com/langchain-ai/langchain) and [llama-index](https://github.com/run-llama/llama_index).
Bumps the pip group with 6 updates in the /examples/langchain/langchainpy-localai-example directory:
| Package | From | To |
| --- | --- | --- |
| [langchain](https://github.com/langchain-ai/langchain) | `0.0.159` | `0.1.0` |
| [aiohttp](https://github.com/aio-libs/aiohttp) | `3.8.4` | `3.9.2` |
| [certifi](https://github.com/certifi/python-certifi) | `2022.12.7` | `2023.7.22` |
| [idna](https://github.com/kjd/idna) | `3.4` | `3.7` |
| [requests](https://github.com/psf/requests) | `2.29.0` | `2.31.0` |
| [urllib3](https://github.com/urllib3/urllib3) | `1.26.15` | `1.26.18` |
Bumps the pip group with 1 update in the /examples/streamlit-bot directory: [streamlit](https://github.com/streamlit/streamlit).
Updates `langchain` from 0.0.234 to 0.1.0
- [Release notes](https://github.com/langchain-ai/langchain/releases)
- [Commits](https://github.com/langchain-ai/langchain/compare/v0.0.234...v0.1.0)
Updates `langchain` from 0.0.160 to 0.1.0
- [Release notes](https://github.com/langchain-ai/langchain/releases)
- [Commits](https://github.com/langchain-ai/langchain/compare/v0.0.234...v0.1.0)
Updates `llama-index` from 0.6.2 to 0.9.36
- [Release notes](https://github.com/run-llama/llama_index/releases)
- [Changelog](https://github.com/run-llama/llama_index/blob/main/CHANGELOG.md)
- [Commits](https://github.com/run-llama/llama_index/compare/v0.6.2...v0.9.36)
Updates `langchain` from 0.0.159 to 0.1.0
- [Release notes](https://github.com/langchain-ai/langchain/releases)
- [Commits](https://github.com/langchain-ai/langchain/compare/v0.0.234...v0.1.0)
Updates `aiohttp` from 3.8.4 to 3.9.2
- [Release notes](https://github.com/aio-libs/aiohttp/releases)
- [Changelog](https://github.com/aio-libs/aiohttp/blob/master/CHANGES.rst)
- [Commits](https://github.com/aio-libs/aiohttp/compare/v3.8.4...v3.9.2)
Updates `certifi` from 2022.12.7 to 2023.7.22
- [Commits](https://github.com/certifi/python-certifi/compare/2022.12.07...2023.07.22)
Updates `idna` from 3.4 to 3.7
- [Release notes](https://github.com/kjd/idna/releases)
- [Changelog](https://github.com/kjd/idna/blob/master/HISTORY.rst)
- [Commits](https://github.com/kjd/idna/compare/v3.4...v3.7)
Updates `requests` from 2.29.0 to 2.31.0
- [Release notes](https://github.com/psf/requests/releases)
- [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md)
- [Commits](https://github.com/psf/requests/compare/v2.29.0...v2.31.0)
Updates `urllib3` from 1.26.15 to 1.26.18
- [Release notes](https://github.com/urllib3/urllib3/releases)
- [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst)
- [Commits](https://github.com/urllib3/urllib3/compare/1.26.15...1.26.18)
Updates `streamlit` from 1.26.0 to 1.30.0
- [Release notes](https://github.com/streamlit/streamlit/releases)
- [Commits](https://github.com/streamlit/streamlit/compare/1.26.0...1.30.0)
---
updated-dependencies:
- dependency-name: langchain
dependency-type: direct:production
dependency-group: pip
- dependency-name: langchain
dependency-type: direct:production
dependency-group: pip
- dependency-name: llama-index
dependency-type: direct:production
dependency-group: pip
- dependency-name: langchain
dependency-type: direct:production
dependency-group: pip
- dependency-name: aiohttp
dependency-type: direct:production
dependency-group: pip
- dependency-name: certifi
dependency-type: direct:production
dependency-group: pip
- dependency-name: idna
dependency-type: direct:production
dependency-group: pip
- dependency-name: requests
dependency-type: direct:production
dependency-group: pip
- dependency-name: urllib3
dependency-type: direct:production
dependency-group: pip
- dependency-name: streamlit
dependency-type: direct:production
dependency-group: pip
...
Signed-off-by: dependabot[bot]
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
examples/functions/requirements.txt | 2 +-
examples/langchain-chroma/requirements.txt | 4 ++--
.../langchainpy-localai-example/requirements.txt | 12 ++++++------
examples/streamlit-bot/requirements.txt | 2 +-
4 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/examples/functions/requirements.txt b/examples/functions/requirements.txt
index 7164e011..759c5b03 100644
--- a/examples/functions/requirements.txt
+++ b/examples/functions/requirements.txt
@@ -1,2 +1,2 @@
-langchain==0.0.234
+langchain==0.1.0
openai==0.27.8
diff --git a/examples/langchain-chroma/requirements.txt b/examples/langchain-chroma/requirements.txt
index b9e649c5..cdf466b9 100644
--- a/examples/langchain-chroma/requirements.txt
+++ b/examples/langchain-chroma/requirements.txt
@@ -1,4 +1,4 @@
-langchain==0.0.160
+langchain==0.1.0
openai==0.27.6
chromadb==0.3.21
-llama-index==0.6.2
\ No newline at end of file
+llama-index==0.9.36
\ No newline at end of file
diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt
index 2de5bcf0..1e63b0bf 100644
--- a/examples/langchain/langchainpy-localai-example/requirements.txt
+++ b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -1,16 +1,16 @@
-aiohttp==3.8.4
+aiohttp==3.9.2
aiosignal==1.3.1
async-timeout==4.0.2
attrs==23.1.0
-certifi==2022.12.7
+certifi==2023.7.22
charset-normalizer==3.1.0
colorama==0.4.6
dataclasses-json==0.5.7
debugpy==1.6.7
frozenlist==1.3.3
greenlet==2.0.2
-idna==3.4
-langchain==0.0.159
+idna==3.7
+langchain==0.1.0
marshmallow==3.19.0
marshmallow-enum==1.5.1
multidict==6.0.4
@@ -22,11 +22,11 @@ openapi-schema-pydantic==1.2.4
packaging==23.1
pydantic==1.10.7
PyYAML==6.0
-requests==2.29.0
+requests==2.31.0
SQLAlchemy==2.0.12
tenacity==8.2.2
tqdm==4.65.0
typing-inspect==0.8.0
typing_extensions==4.5.0
-urllib3==1.26.15
+urllib3==1.26.18
yarl==1.9.2
diff --git a/examples/streamlit-bot/requirements.txt b/examples/streamlit-bot/requirements.txt
index ae527c76..1fcd5093 100644
--- a/examples/streamlit-bot/requirements.txt
+++ b/examples/streamlit-bot/requirements.txt
@@ -1,2 +1,2 @@
-streamlit==1.26.0
+streamlit==1.30.0
requests
\ No newline at end of file
From 6b06d4e0af4db7a8aa8e131ec2b3af171934862e Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Tue, 16 Apr 2024 23:20:11 +0200
Subject: [PATCH 0171/2750] fix(fncall): fix regression introduced in #1963
(#2048)
Signed-off-by: Dave
---------
Signed-off-by: Ettore Di Giacinto
Signed-off-by: Dave
Co-authored-by: Dave
---
core/services/openai.go | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/core/services/openai.go b/core/services/openai.go
index 3fa041f5..7a2679ad 100644
--- a/core/services/openai.go
+++ b/core/services/openai.go
@@ -778,13 +778,16 @@ func parseFunctionCall(llmresult string, multipleResults bool) []funcCallResults
// As we have to change the result before processing, we can't stream the answer token-by-token (yet?)
ss := map[string]interface{}{}
// This prevent newlines to break JSON parsing for clients
- // s := utils.EscapeNewLines(llmresult)
- json.Unmarshal([]byte(llmresult), &ss)
+ s := utils.EscapeNewLines(llmresult)
+ if err := json.Unmarshal([]byte(s), &ss); err != nil {
+ log.Error().Msgf("error unmarshalling JSON: %s", err.Error())
+ return results
+ }
// The grammar defines the function name as "function", while OpenAI returns "name"
func_name, ok := ss["function"]
if !ok {
- log.Debug().Msg("ss[function] is not OK!")
+ log.Debug().Msgf("ss[function] is not OK!, llm result: %q", llmresult)
return results
}
// Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object
From 5763dc161376c86e4611ee9b7be54073a4fccf5b Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Tue, 16 Apr 2024 23:37:50 +0200
Subject: [PATCH 0172/2750] :arrow_up: Update ggerganov/whisper.cpp (#2050)
Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index 7cde8fa7..f5b4dc2a 100644
--- a/Makefile
+++ b/Makefile
@@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
# whisper.cpp version
-WHISPER_CPP_VERSION?=9fab28135c7867bb7eccd9ebcd2ea8d52e42ca81
+WHISPER_CPP_VERSION?=a750868428868abd437e228ae5cab763ef3dc387
# bert.cpp version
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
From af8c705ecd1ec47ca1254d7e7b8ab7ca7da89b57 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Wed, 17 Apr 2024 23:17:25 +0200
Subject: [PATCH 0173/2750] :arrow_up: Update ggerganov/whisper.cpp (#2060)
Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index f5b4dc2a..fdc7aade 100644
--- a/Makefile
+++ b/Makefile
@@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
# whisper.cpp version
-WHISPER_CPP_VERSION?=a750868428868abd437e228ae5cab763ef3dc387
+WHISPER_CPP_VERSION?=b0c3cbf2e851cf232e432b590dcc514a689ec028
# bert.cpp version
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
From af9e5a2d05d477eedaf1bff08370208d2b4a9d86 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Wed, 17 Apr 2024 23:33:49 +0200
Subject: [PATCH 0174/2750] Revert #1963 (#2056)
* Revert "fix(fncall): fix regression introduced in #1963 (#2048)"
This reverts commit 6b06d4e0af4db7a8aa8e131ec2b3af171934862e.
* Revert "fix: action-tmate back to upstream, dead code removal (#2038)"
This reverts commit fdec8a9d00a034ccd8e075008edd165147edf328.
* Revert "feat(grpc): return consumed token count and update response accordingly (#2035)"
This reverts commit e843d7df0e8b177ab122a9f7bfa7196274ccd204.
* Revert "refactor: backend/service split, channel-based llm flow (#1963)"
This reverts commit eed5706994a3e770a0194cad9d1cfd724ba1b10a.
* feat(grpc): return consumed token count and update response accordingly
Fixes: #1920
Signed-off-by: Ettore Di Giacinto
---------
Signed-off-by: Ettore Di Giacinto
---
.github/workflows/test.yml | 15 +-
Makefile | 18 +-
backend/go/transcribe/transcript.go | 6 +-
backend/go/transcribe/whisper.go | 2 +-
core/backend/embeddings.go | 90 +-
core/backend/image.go | 259 +-----
core/backend/llm.go | 271 ++----
core/backend/options.go | 84 +-
core/backend/transcript.go | 41 +-
core/backend/tts.go | 77 +-
core/cli/run.go | 8 +-
core/cli/transcript.go | 19 +-
core/cli/tts.go | 26 +-
core/config/backend_config.go | 301 ++++++-
core/config/backend_config_loader.go | 509 -----------
core/config/exports_test.go | 6 -
core/http/api.go | 227 ++---
core/http/api_test.go | 98 +--
core/http/ctx/fiber.go | 65 +-
core/http/endpoints/elevenlabs/tts.go | 39 +-
.../http/endpoints/localai/backend_monitor.go | 4 +-
core/http/endpoints/localai/tts.go | 39 +-
core/http/endpoints/openai/assistant.go | 2 +-
core/http/endpoints/openai/chat.go | 621 ++++++++++++--
core/http/endpoints/openai/completion.go | 163 +++-
core/http/endpoints/openai/edit.go | 78 +-
core/http/endpoints/openai/embeddings.go | 65 +-
core/http/endpoints/openai/image.go | 216 ++++-
core/http/endpoints/openai/inference.go | 55 ++
core/http/endpoints/openai/list.go | 52 +-
core/http/endpoints/openai/request.go | 285 ++++++
core/http/endpoints/openai/transcription.go | 28 +-
core/schema/{transcription.go => whisper.go} | 2 +-
core/services/backend_monitor.go | 30 +-
core/services/gallery.go | 116 +--
core/services/list_models.go | 72 --
core/services/openai.go | 808 ------------------
core/startup/startup.go | 91 +-
core/state.go | 41 -
.../llm text/-completions Stream.bru | 25 -
pkg/concurrency/concurrency.go | 135 ---
pkg/concurrency/concurrency_test.go | 101 ---
pkg/concurrency/types.go | 6 -
pkg/grpc/backend.go | 2 +-
pkg/grpc/base/base.go | 4 +-
pkg/grpc/client.go | 4 +-
pkg/grpc/embed.go | 4 +-
pkg/grpc/interface.go | 2 +-
pkg/model/initializers.go | 8 +-
pkg/startup/model_preload.go | 85 ++
.../startup}/model_preload_test.go | 5 +-
pkg/utils/base64.go | 50 --
52 files changed, 2295 insertions(+), 3065 deletions(-)
delete mode 100644 core/config/backend_config_loader.go
delete mode 100644 core/config/exports_test.go
create mode 100644 core/http/endpoints/openai/inference.go
create mode 100644 core/http/endpoints/openai/request.go
rename core/schema/{transcription.go => whisper.go} (90%)
delete mode 100644 core/services/list_models.go
delete mode 100644 core/services/openai.go
delete mode 100644 core/state.go
delete mode 100644 examples/bruno/LocalAI Test Requests/llm text/-completions Stream.bru
delete mode 100644 pkg/concurrency/concurrency.go
delete mode 100644 pkg/concurrency/concurrency_test.go
delete mode 100644 pkg/concurrency/types.go
create mode 100644 pkg/startup/model_preload.go
rename {core/services => pkg/startup}/model_preload_test.go (96%)
delete mode 100644 pkg/utils/base64.go
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 156294b5..46c4e065 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -121,9 +121,8 @@ jobs:
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
- name: Setup tmate session if tests fail
if: ${{ failure() }}
- uses: mxschmitt/action-tmate@v3.18
- with:
- connect-timeout-seconds: 180
+ uses: mxschmitt/action-tmate@v3
+ timeout-minutes: 5
tests-aio-container:
runs-on: ubuntu-latest
@@ -174,9 +173,8 @@ jobs:
make run-e2e-aio
- name: Setup tmate session if tests fail
if: ${{ failure() }}
- uses: mxschmitt/action-tmate@v3.18
- with:
- connect-timeout-seconds: 180
+ uses: mxschmitt/action-tmate@v3
+ timeout-minutes: 5
tests-apple:
runs-on: macOS-14
@@ -209,6 +207,5 @@ jobs:
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make --jobs 4 --output-sync=target test
- name: Setup tmate session if tests fail
if: ${{ failure() }}
- uses: mxschmitt/action-tmate@v3.18
- with:
- connect-timeout-seconds: 180
\ No newline at end of file
+ uses: mxschmitt/action-tmate@v3
+ timeout-minutes: 5
\ No newline at end of file
diff --git a/Makefile b/Makefile
index fdc7aade..6715e91e 100644
--- a/Makefile
+++ b/Makefile
@@ -301,9 +301,6 @@ clean-tests:
rm -rf test-dir
rm -rf core/http/backend-assets
-halt-backends: ## Used to clean up stray backends sometimes left running when debugging manually
- ps | grep 'backend-assets/grpc/' | awk '{print $$1}' | xargs -I {} kill -9 {}
-
## Build:
build: prepare backend-assets grpcs ## Build the project
$(info ${GREEN}I local-ai build info:${RESET})
@@ -368,13 +365,13 @@ run-e2e-image:
run-e2e-aio:
@echo 'Running e2e AIO tests'
- $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e-aio
+ $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e-aio
test-e2e:
@echo 'Running e2e tests'
BUILD_TYPE=$(BUILD_TYPE) \
LOCALAI_API=http://$(E2E_BRIDGE_IP):5390/v1 \
- $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e
+ $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e
teardown-e2e:
rm -rf $(TEST_DIR) || true
@@ -382,15 +379,15 @@ teardown-e2e:
test-gpt4all: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
- $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
+ $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts 5 -v -r $(TEST_PATHS)
test-llama: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
- $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
+ $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r $(TEST_PATHS)
test-llama-gguf: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
- $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
+ $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts 5 -v -r $(TEST_PATHS)
test-tts: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
@@ -648,10 +645,7 @@ backend-assets/grpc/llama-ggml: sources/go-llama-ggml sources/go-llama-ggml/libb
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama-ggml
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama-ggml LIBRARY_PATH=$(CURDIR)/sources/go-llama-ggml \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
-# EXPERIMENTAL:
-ifeq ($(BUILD_TYPE),metal)
- cp $(CURDIR)/sources/go-llama-ggml/llama.cpp/ggml-metal.metal backend-assets/grpc/
-endif
+
backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
diff --git a/backend/go/transcribe/transcript.go b/backend/go/transcribe/transcript.go
index b38d5b9f..fdfaa974 100644
--- a/backend/go/transcribe/transcript.go
+++ b/backend/go/transcribe/transcript.go
@@ -21,7 +21,7 @@ func runCommand(command []string) (string, error) {
// AudioToWav converts audio to wav for transcribe.
// TODO: use https://github.com/mccoyst/ogg?
func audioToWav(src, dst string) error {
- command := []string{"ffmpeg", "-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst}
+ command := []string{"ffmpeg", "-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst}
out, err := runCommand(command)
if err != nil {
return fmt.Errorf("error: %w out: %s", err, out)
@@ -29,8 +29,8 @@ func audioToWav(src, dst string) error {
return nil
}
-func Transcript(model whisper.Model, audiopath, language string, threads uint) (schema.TranscriptionResult, error) {
- res := schema.TranscriptionResult{}
+func Transcript(model whisper.Model, audiopath, language string, threads uint) (schema.Result, error) {
+ res := schema.Result{}
dir, err := os.MkdirTemp("", "whisper")
if err != nil {
diff --git a/backend/go/transcribe/whisper.go b/backend/go/transcribe/whisper.go
index a9a62d24..ac93be01 100644
--- a/backend/go/transcribe/whisper.go
+++ b/backend/go/transcribe/whisper.go
@@ -21,6 +21,6 @@ func (sd *Whisper) Load(opts *pb.ModelOptions) error {
return err
}
-func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.TranscriptionResult, error) {
+func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.Result, error) {
return Transcript(sd.whisper, opts.Dst, opts.Language, uint(opts.Threads))
}
diff --git a/core/backend/embeddings.go b/core/backend/embeddings.go
index 2c63dedc..03ff90b9 100644
--- a/core/backend/embeddings.go
+++ b/core/backend/embeddings.go
@@ -2,100 +2,14 @@ package backend
import (
"fmt"
- "time"
"github.com/go-skynet/LocalAI/core/config"
- "github.com/go-skynet/LocalAI/core/schema"
- "github.com/google/uuid"
- "github.com/go-skynet/LocalAI/pkg/concurrency"
"github.com/go-skynet/LocalAI/pkg/grpc"
- "github.com/go-skynet/LocalAI/pkg/model"
+ model "github.com/go-skynet/LocalAI/pkg/model"
)
-type EmbeddingsBackendService struct {
- ml *model.ModelLoader
- bcl *config.BackendConfigLoader
- appConfig *config.ApplicationConfig
-}
-
-func NewEmbeddingsBackendService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *EmbeddingsBackendService {
- return &EmbeddingsBackendService{
- ml: ml,
- bcl: bcl,
- appConfig: appConfig,
- }
-}
-
-func (ebs *EmbeddingsBackendService) Embeddings(request *schema.OpenAIRequest) <-chan concurrency.ErrorOr[*schema.OpenAIResponse] {
-
- resultChannel := make(chan concurrency.ErrorOr[*schema.OpenAIResponse])
- go func(request *schema.OpenAIRequest) {
- if request.Model == "" {
- request.Model = model.StableDiffusionBackend
- }
-
- bc, request, err := ebs.bcl.LoadBackendConfigForModelAndOpenAIRequest(request.Model, request, ebs.appConfig)
- if err != nil {
- resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
- close(resultChannel)
- return
- }
-
- items := []schema.Item{}
-
- for i, s := range bc.InputToken {
- // get the model function to call for the result
- embedFn, err := modelEmbedding("", s, ebs.ml, bc, ebs.appConfig)
- if err != nil {
- resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
- close(resultChannel)
- return
- }
-
- embeddings, err := embedFn()
- if err != nil {
- resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
- close(resultChannel)
- return
- }
- items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"})
- }
-
- for i, s := range bc.InputStrings {
- // get the model function to call for the result
- embedFn, err := modelEmbedding(s, []int{}, ebs.ml, bc, ebs.appConfig)
- if err != nil {
- resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
- close(resultChannel)
- return
- }
-
- embeddings, err := embedFn()
- if err != nil {
- resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
- close(resultChannel)
- return
- }
- items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"})
- }
-
- id := uuid.New().String()
- created := int(time.Now().Unix())
- resp := &schema.OpenAIResponse{
- ID: id,
- Created: created,
- Model: request.Model, // we have to return what the user sent here, due to OpenAI spec.
- Data: items,
- Object: "list",
- }
- resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: resp}
- close(resultChannel)
- }(request)
- return resultChannel
-}
-
-func modelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig *config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) {
+func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) {
modelFile := backendConfig.Model
grpcOpts := gRPCModelOpts(backendConfig)
diff --git a/core/backend/image.go b/core/backend/image.go
index affb3bb3..b0cffb0b 100644
--- a/core/backend/image.go
+++ b/core/backend/image.go
@@ -1,252 +1,18 @@
package backend
import (
- "bufio"
- "encoding/base64"
- "fmt"
- "io"
- "net/http"
- "os"
- "path/filepath"
- "strconv"
- "strings"
- "time"
-
"github.com/go-skynet/LocalAI/core/config"
- "github.com/go-skynet/LocalAI/core/schema"
- "github.com/google/uuid"
- "github.com/rs/zerolog/log"
- "github.com/go-skynet/LocalAI/pkg/concurrency"
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
- "github.com/go-skynet/LocalAI/pkg/model"
+ model "github.com/go-skynet/LocalAI/pkg/model"
)
-type ImageGenerationBackendService struct {
- ml *model.ModelLoader
- bcl *config.BackendConfigLoader
- appConfig *config.ApplicationConfig
- BaseUrlForGeneratedImages string
-}
-
-func NewImageGenerationBackendService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *ImageGenerationBackendService {
- return &ImageGenerationBackendService{
- ml: ml,
- bcl: bcl,
- appConfig: appConfig,
- }
-}
-
-func (igbs *ImageGenerationBackendService) GenerateImage(request *schema.OpenAIRequest) <-chan concurrency.ErrorOr[*schema.OpenAIResponse] {
- resultChannel := make(chan concurrency.ErrorOr[*schema.OpenAIResponse])
- go func(request *schema.OpenAIRequest) {
- bc, request, err := igbs.bcl.LoadBackendConfigForModelAndOpenAIRequest(request.Model, request, igbs.appConfig)
- if err != nil {
- resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
- close(resultChannel)
- return
- }
-
- src := ""
- if request.File != "" {
-
- var fileData []byte
- // check if input.File is an URL, if so download it and save it
- // to a temporary file
- if strings.HasPrefix(request.File, "http://") || strings.HasPrefix(request.File, "https://") {
- out, err := downloadFile(request.File)
- if err != nil {
- resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: fmt.Errorf("failed downloading file:%w", err)}
- close(resultChannel)
- return
- }
- defer os.RemoveAll(out)
-
- fileData, err = os.ReadFile(out)
- if err != nil {
- resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: fmt.Errorf("failed reading file:%w", err)}
- close(resultChannel)
- return
- }
-
- } else {
- // base 64 decode the file and write it somewhere
- // that we will cleanup
- fileData, err = base64.StdEncoding.DecodeString(request.File)
- if err != nil {
- resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
- close(resultChannel)
- return
- }
- }
-
- // Create a temporary file
- outputFile, err := os.CreateTemp(igbs.appConfig.ImageDir, "b64")
- if err != nil {
- resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
- close(resultChannel)
- return
- }
- // write the base64 result
- writer := bufio.NewWriter(outputFile)
- _, err = writer.Write(fileData)
- if err != nil {
- outputFile.Close()
- resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
- close(resultChannel)
- return
- }
- outputFile.Close()
- src = outputFile.Name()
- defer os.RemoveAll(src)
- }
-
- log.Debug().Msgf("Parameter Config: %+v", bc)
-
- switch bc.Backend {
- case "stablediffusion":
- bc.Backend = model.StableDiffusionBackend
- case "tinydream":
- bc.Backend = model.TinyDreamBackend
- case "":
- bc.Backend = model.StableDiffusionBackend
- if bc.Model == "" {
- bc.Model = "stablediffusion_assets" // TODO: check?
- }
- }
-
- sizeParts := strings.Split(request.Size, "x")
- if len(sizeParts) != 2 {
- resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: fmt.Errorf("invalid value for 'size'")}
- close(resultChannel)
- return
- }
- width, err := strconv.Atoi(sizeParts[0])
- if err != nil {
- resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: fmt.Errorf("invalid value for 'size'")}
- close(resultChannel)
- return
- }
- height, err := strconv.Atoi(sizeParts[1])
- if err != nil {
- resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: fmt.Errorf("invalid value for 'size'")}
- close(resultChannel)
- return
- }
-
- b64JSON := false
- if request.ResponseFormat.Type == "b64_json" {
- b64JSON = true
- }
- // src and clip_skip
- var result []schema.Item
- for _, i := range bc.PromptStrings {
- n := request.N
- if request.N == 0 {
- n = 1
- }
- for j := 0; j < n; j++ {
- prompts := strings.Split(i, "|")
- positive_prompt := prompts[0]
- negative_prompt := ""
- if len(prompts) > 1 {
- negative_prompt = prompts[1]
- }
-
- mode := 0
- step := bc.Step
- if step == 0 {
- step = 15
- }
-
- if request.Mode != 0 {
- mode = request.Mode
- }
-
- if request.Step != 0 {
- step = request.Step
- }
-
- tempDir := ""
- if !b64JSON {
- tempDir = igbs.appConfig.ImageDir
- }
- // Create a temporary file
- outputFile, err := os.CreateTemp(tempDir, "b64")
- if err != nil {
- resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
- close(resultChannel)
- return
- }
- outputFile.Close()
- output := outputFile.Name() + ".png"
- // Rename the temporary file
- err = os.Rename(outputFile.Name(), output)
- if err != nil {
- resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
- close(resultChannel)
- return
- }
-
- if request.Seed == nil {
- zVal := 0 // Idiomatic way to do this? Actually needed?
- request.Seed = &zVal
- }
-
- fn, err := imageGeneration(height, width, mode, step, *request.Seed, positive_prompt, negative_prompt, src, output, igbs.ml, bc, igbs.appConfig)
- if err != nil {
- resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
- close(resultChannel)
- return
- }
- if err := fn(); err != nil {
- resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
- close(resultChannel)
- return
- }
-
- item := &schema.Item{}
-
- if b64JSON {
- defer os.RemoveAll(output)
- data, err := os.ReadFile(output)
- if err != nil {
- resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
- close(resultChannel)
- return
- }
- item.B64JSON = base64.StdEncoding.EncodeToString(data)
- } else {
- base := filepath.Base(output)
- item.URL = igbs.BaseUrlForGeneratedImages + base
- }
-
- result = append(result, *item)
- }
- }
-
- id := uuid.New().String()
- created := int(time.Now().Unix())
- resp := &schema.OpenAIResponse{
- ID: id,
- Created: created,
- Data: result,
- }
- resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: resp}
- close(resultChannel)
- }(request)
- return resultChannel
-}
-
-func imageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig *config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) {
-
+func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) {
threads := backendConfig.Threads
if *threads == 0 && appConfig.Threads != 0 {
threads = &appConfig.Threads
}
-
gRPCOpts := gRPCModelOpts(backendConfig)
-
opts := modelOpts(backendConfig, appConfig, []model.Option{
model.WithBackendString(backendConfig.Backend),
model.WithAssetDir(appConfig.AssetsDestination),
@@ -284,24 +50,3 @@ func imageGeneration(height, width, mode, step, seed int, positive_prompt, negat
return fn, nil
}
-
-// TODO: Replace this function with pkg/downloader - no reason to have a (crappier) bespoke download file fn here, but get things working before that change.
-func downloadFile(url string) (string, error) {
- // Get the data
- resp, err := http.Get(url)
- if err != nil {
- return "", err
- }
- defer resp.Body.Close()
-
- // Create the file
- out, err := os.CreateTemp("", "image")
- if err != nil {
- return "", err
- }
- defer out.Close()
-
- // Write the body to file
- _, err = io.Copy(out, resp.Body)
- return out.Name(), err
-}
diff --git a/core/backend/llm.go b/core/backend/llm.go
index 75766d78..a4d1e5f3 100644
--- a/core/backend/llm.go
+++ b/core/backend/llm.go
@@ -11,22 +11,17 @@ import (
"github.com/go-skynet/LocalAI/core/config"
"github.com/go-skynet/LocalAI/core/schema"
- "github.com/rs/zerolog/log"
- "github.com/go-skynet/LocalAI/pkg/concurrency"
"github.com/go-skynet/LocalAI/pkg/gallery"
"github.com/go-skynet/LocalAI/pkg/grpc"
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
- "github.com/go-skynet/LocalAI/pkg/model"
+ model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/go-skynet/LocalAI/pkg/utils"
)
-type LLMRequest struct {
- Id int // TODO Remove if not used.
- Text string
- Images []string
- RawMessages []schema.Message
- // TODO: Other Modalities?
+type LLMResponse struct {
+ Response string // should this be []byte?
+ Usage TokenUsage
}
type TokenUsage struct {
@@ -34,94 +29,57 @@ type TokenUsage struct {
Completion int
}
-type LLMResponse struct {
- Request *LLMRequest
- Response string // should this be []byte?
- Usage TokenUsage
-}
-
-// TODO: Does this belong here or in core/services/openai.go?
-type LLMResponseBundle struct {
- Request *schema.OpenAIRequest
- Response []schema.Choice
- Usage TokenUsage
-}
-
-type LLMBackendService struct {
- bcl *config.BackendConfigLoader
- ml *model.ModelLoader
- appConfig *config.ApplicationConfig
- ftMutex sync.Mutex
- cutstrings map[string]*regexp.Regexp
-}
-
-func NewLLMBackendService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *LLMBackendService {
- return &LLMBackendService{
- bcl: bcl,
- ml: ml,
- appConfig: appConfig,
- ftMutex: sync.Mutex{},
- cutstrings: make(map[string]*regexp.Regexp),
+func ModelInference(ctx context.Context, s string, messages []schema.Message, images []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
+ modelFile := c.Model
+ threads := c.Threads
+ if *threads == 0 && o.Threads != 0 {
+ threads = &o.Threads
}
-}
-
-// TODO: Should ctx param be removed and replaced with hardcoded req.Context?
-func (llmbs *LLMBackendService) Inference(ctx context.Context, req *LLMRequest, bc *config.BackendConfig, enableTokenChannel bool) (
- resultChannel <-chan concurrency.ErrorOr[*LLMResponse], tokenChannel <-chan concurrency.ErrorOr[*LLMResponse], err error) {
-
- threads := bc.Threads
- if (threads == nil || *threads == 0) && llmbs.appConfig.Threads != 0 {
- threads = &llmbs.appConfig.Threads
- }
-
- grpcOpts := gRPCModelOpts(bc)
+ grpcOpts := gRPCModelOpts(c)
var inferenceModel grpc.Backend
+ var err error
- opts := modelOpts(bc, llmbs.appConfig, []model.Option{
+ opts := modelOpts(c, o, []model.Option{
model.WithLoadGRPCLoadModelOpts(grpcOpts),
model.WithThreads(uint32(*threads)), // some models uses this to allocate threads during startup
- model.WithAssetDir(llmbs.appConfig.AssetsDestination),
- model.WithModel(bc.Model),
- model.WithContext(llmbs.appConfig.Context),
+ model.WithAssetDir(o.AssetsDestination),
+ model.WithModel(modelFile),
+ model.WithContext(o.Context),
})
- if bc.Backend != "" {
- opts = append(opts, model.WithBackendString(bc.Backend))
+ if c.Backend != "" {
+ opts = append(opts, model.WithBackendString(c.Backend))
}
- // Check if bc.Model exists, if it doesn't try to load it from the gallery
- if llmbs.appConfig.AutoloadGalleries { // experimental
- if _, err := os.Stat(bc.Model); os.IsNotExist(err) {
+ // Check if the modelFile exists, if it doesn't try to load it from the gallery
+ if o.AutoloadGalleries { // experimental
+ if _, err := os.Stat(modelFile); os.IsNotExist(err) {
utils.ResetDownloadTimers()
// if we failed to load the model, we try to download it
- err := gallery.InstallModelFromGalleryByName(llmbs.appConfig.Galleries, bc.Model, llmbs.appConfig.ModelPath, gallery.GalleryModel{}, utils.DisplayDownloadFunction)
+ err := gallery.InstallModelFromGalleryByName(o.Galleries, modelFile, loader.ModelPath, gallery.GalleryModel{}, utils.DisplayDownloadFunction)
if err != nil {
- return nil, nil, err
+ return nil, err
}
}
}
- if bc.Backend == "" {
- log.Debug().Msgf("backend not known for %q, falling back to greedy loader to find it", bc.Model)
- inferenceModel, err = llmbs.ml.GreedyLoader(opts...)
+ if c.Backend == "" {
+ inferenceModel, err = loader.GreedyLoader(opts...)
} else {
- inferenceModel, err = llmbs.ml.BackendLoader(opts...)
+ inferenceModel, err = loader.BackendLoader(opts...)
}
if err != nil {
- log.Error().Err(err).Msg("[llmbs.Inference] failed to load a backend")
- return
+ return nil, err
}
- grpcPredOpts := gRPCPredictOpts(bc, llmbs.appConfig.ModelPath)
- grpcPredOpts.Prompt = req.Text
- grpcPredOpts.Images = req.Images
-
- if bc.TemplateConfig.UseTokenizerTemplate && req.Text == "" {
- grpcPredOpts.UseTokenizerTemplate = true
- protoMessages := make([]*proto.Message, len(req.RawMessages), len(req.RawMessages))
- for i, message := range req.RawMessages {
+ var protoMessages []*proto.Message
+ // if we are using the tokenizer template, we need to convert the messages to proto messages
+ // unless the prompt has already been tokenized (non-chat endpoints + functions)
+ if c.TemplateConfig.UseTokenizerTemplate && s == "" {
+ protoMessages = make([]*proto.Message, len(messages), len(messages))
+ for i, message := range messages {
protoMessages[i] = &proto.Message{
Role: message.Role,
}
@@ -129,32 +87,47 @@ func (llmbs *LLMBackendService) Inference(ctx context.Context, req *LLMRequest,
case string:
protoMessages[i].Content = ct
default:
- err = fmt.Errorf("unsupported type for schema.Message.Content for inference: %T", ct)
- return
+ return nil, fmt.Errorf("Unsupported type for schema.Message.Content for inference: %T", ct)
}
}
}
- tokenUsage := TokenUsage{}
+ // in GRPC, the backend is supposed to answer to 1 single token if stream is not supported
+ fn := func() (LLMResponse, error) {
+ opts := gRPCPredictOpts(c, loader.ModelPath)
+ opts.Prompt = s
+ opts.Messages = protoMessages
+ opts.UseTokenizerTemplate = c.TemplateConfig.UseTokenizerTemplate
+ opts.Images = images
- promptInfo, pErr := inferenceModel.TokenizeString(ctx, grpcPredOpts)
- if pErr == nil && promptInfo.Length > 0 {
- tokenUsage.Prompt = int(promptInfo.Length)
- }
+ tokenUsage := TokenUsage{}
- rawResultChannel := make(chan concurrency.ErrorOr[*LLMResponse])
- // TODO this next line is the biggest argument for taking named return values _back_ out!!!
- var rawTokenChannel chan concurrency.ErrorOr[*LLMResponse]
+ // check the per-model feature flag for usage, since tokenCallback may have a cost.
+ // Defaults to off as for now it is still experimental
+ if c.FeatureFlag.Enabled("usage") {
+ userTokenCallback := tokenCallback
+ if userTokenCallback == nil {
+ userTokenCallback = func(token string, usage TokenUsage) bool {
+ return true
+ }
+ }
- if enableTokenChannel {
- rawTokenChannel = make(chan concurrency.ErrorOr[*LLMResponse])
+ promptInfo, pErr := inferenceModel.TokenizeString(ctx, opts)
+ if pErr == nil && promptInfo.Length > 0 {
+ tokenUsage.Prompt = int(promptInfo.Length)
+ }
- // TODO Needs better name
- ss := ""
+ tokenCallback = func(token string, usage TokenUsage) bool {
+ tokenUsage.Completion++
+ return userTokenCallback(token, tokenUsage)
+ }
+ }
+
+ if tokenCallback != nil {
+ ss := ""
- go func() {
var partialRune []byte
- err := inferenceModel.PredictStream(ctx, grpcPredOpts, func(chars []byte) {
+ err := inferenceModel.PredictStream(ctx, opts, func(chars []byte) {
partialRune = append(partialRune, chars...)
for len(partialRune) > 0 {
@@ -164,126 +137,54 @@ func (llmbs *LLMBackendService) Inference(ctx context.Context, req *LLMRequest,
break
}
- tokenUsage.Completion++
- rawTokenChannel <- concurrency.ErrorOr[*LLMResponse]{Value: &LLMResponse{
- Response: string(r),
- Usage: tokenUsage,
- }}
-
+ tokenCallback(string(r), tokenUsage)
ss += string(r)
partialRune = partialRune[size:]
}
})
- close(rawTokenChannel)
+ return LLMResponse{
+ Response: ss,
+ Usage: tokenUsage,
+ }, err
+ } else {
+ // TODO: Is the chicken bit the only way to get here? is that acceptable?
+ reply, err := inferenceModel.Predict(ctx, opts)
if err != nil {
- rawResultChannel <- concurrency.ErrorOr[*LLMResponse]{Error: err}
- } else {
- rawResultChannel <- concurrency.ErrorOr[*LLMResponse]{Value: &LLMResponse{
- Response: ss,
- Usage: tokenUsage,
- }}
+ return LLMResponse{}, err
}
- close(rawResultChannel)
- }()
- } else {
- go func() {
- reply, err := inferenceModel.Predict(ctx, grpcPredOpts)
if tokenUsage.Prompt == 0 {
tokenUsage.Prompt = int(reply.PromptTokens)
}
if tokenUsage.Completion == 0 {
tokenUsage.Completion = int(reply.Tokens)
}
- if err != nil {
- rawResultChannel <- concurrency.ErrorOr[*LLMResponse]{Error: err}
- close(rawResultChannel)
- } else {
- rawResultChannel <- concurrency.ErrorOr[*LLMResponse]{Value: &LLMResponse{
- Response: string(reply.Message),
- Usage: tokenUsage,
- }}
- close(rawResultChannel)
- }
- }()
+ return LLMResponse{
+ Response: string(reply.Message),
+ Usage: tokenUsage,
+ }, err
+ }
}
- resultChannel = rawResultChannel
- tokenChannel = rawTokenChannel
- return
+ return fn, nil
}
-// TODO: Should predInput be a seperate param still, or should this fn handle extracting it from request??
-func (llmbs *LLMBackendService) GenerateText(predInput string, request *schema.OpenAIRequest, bc *config.BackendConfig,
- mappingFn func(*LLMResponse) schema.Choice, enableCompletionChannels bool, enableTokenChannels bool) (
- // Returns:
- resultChannel <-chan concurrency.ErrorOr[*LLMResponseBundle], completionChannels []<-chan concurrency.ErrorOr[*LLMResponse], tokenChannels []<-chan concurrency.ErrorOr[*LLMResponse], err error) {
+var cutstrings map[string]*regexp.Regexp = make(map[string]*regexp.Regexp)
+var mu sync.Mutex = sync.Mutex{}
- rawChannel := make(chan concurrency.ErrorOr[*LLMResponseBundle])
- resultChannel = rawChannel
-
- if request.N == 0 { // number of completions to return
- request.N = 1
- }
- images := []string{}
- for _, m := range request.Messages {
- images = append(images, m.StringImages...)
- }
-
- for i := 0; i < request.N; i++ {
-
- individualResultChannel, tokenChannel, infErr := llmbs.Inference(request.Context, &LLMRequest{
- Text: predInput,
- Images: images,
- RawMessages: request.Messages,
- }, bc, enableTokenChannels)
- if infErr != nil {
- err = infErr // Avoids complaints about redeclaring err but looks dumb
- return
- }
- completionChannels = append(completionChannels, individualResultChannel)
- tokenChannels = append(tokenChannels, tokenChannel)
- }
-
- go func() {
- initialBundle := LLMResponseBundle{
- Request: request,
- Response: []schema.Choice{},
- Usage: TokenUsage{},
- }
-
- wg := concurrency.SliceOfChannelsReducer(completionChannels, rawChannel, func(iv concurrency.ErrorOr[*LLMResponse], ov concurrency.ErrorOr[*LLMResponseBundle]) concurrency.ErrorOr[*LLMResponseBundle] {
- if iv.Error != nil {
- ov.Error = iv.Error
- // TODO: Decide if we should wipe partials or not?
- return ov
- }
- ov.Value.Usage.Prompt += iv.Value.Usage.Prompt
- ov.Value.Usage.Completion += iv.Value.Usage.Completion
-
- ov.Value.Response = append(ov.Value.Response, mappingFn(iv.Value))
- return ov
- }, concurrency.ErrorOr[*LLMResponseBundle]{Value: &initialBundle}, true)
- wg.Wait()
-
- }()
-
- return
-}
-
-func (llmbs *LLMBackendService) Finetune(config config.BackendConfig, input, prediction string) string {
+func Finetune(config config.BackendConfig, input, prediction string) string {
if config.Echo {
prediction = input + prediction
}
for _, c := range config.Cutstrings {
- llmbs.ftMutex.Lock()
- reg, ok := llmbs.cutstrings[c]
+ mu.Lock()
+ reg, ok := cutstrings[c]
if !ok {
- llmbs.cutstrings[c] = regexp.MustCompile(c)
- reg = llmbs.cutstrings[c]
+ cutstrings[c] = regexp.MustCompile(c)
+ reg = cutstrings[c]
}
- llmbs.ftMutex.Unlock()
+ mu.Unlock()
prediction = reg.ReplaceAllString(prediction, "")
}
diff --git a/core/backend/options.go b/core/backend/options.go
index 0b4e56db..5b303b05 100644
--- a/core/backend/options.go
+++ b/core/backend/options.go
@@ -10,7 +10,7 @@ import (
model "github.com/go-skynet/LocalAI/pkg/model"
)
-func modelOpts(bc *config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option {
+func modelOpts(c config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option {
if so.SingleBackend {
opts = append(opts, model.WithSingleActiveBackend())
}
@@ -19,12 +19,12 @@ func modelOpts(bc *config.BackendConfig, so *config.ApplicationConfig, opts []mo
opts = append(opts, model.EnableParallelRequests)
}
- if bc.GRPC.Attempts != 0 {
- opts = append(opts, model.WithGRPCAttempts(bc.GRPC.Attempts))
+ if c.GRPC.Attempts != 0 {
+ opts = append(opts, model.WithGRPCAttempts(c.GRPC.Attempts))
}
- if bc.GRPC.AttemptsSleepTime != 0 {
- opts = append(opts, model.WithGRPCAttemptsDelay(bc.GRPC.AttemptsSleepTime))
+ if c.GRPC.AttemptsSleepTime != 0 {
+ opts = append(opts, model.WithGRPCAttemptsDelay(c.GRPC.AttemptsSleepTime))
}
for k, v := range so.ExternalGRPCBackends {
@@ -34,7 +34,7 @@ func modelOpts(bc *config.BackendConfig, so *config.ApplicationConfig, opts []mo
return opts
}
-func getSeed(c *config.BackendConfig) int32 {
+func getSeed(c config.BackendConfig) int32 {
seed := int32(*c.Seed)
if seed == config.RAND_SEED {
seed = rand.Int31()
@@ -43,7 +43,7 @@ func getSeed(c *config.BackendConfig) int32 {
return seed
}
-func gRPCModelOpts(c *config.BackendConfig) *pb.ModelOptions {
+func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
b := 512
if c.Batch != 0 {
b = c.Batch
@@ -104,47 +104,47 @@ func gRPCModelOpts(c *config.BackendConfig) *pb.ModelOptions {
}
}
-func gRPCPredictOpts(bc *config.BackendConfig, modelPath string) *pb.PredictOptions {
+func gRPCPredictOpts(c config.BackendConfig, modelPath string) *pb.PredictOptions {
promptCachePath := ""
- if bc.PromptCachePath != "" {
- p := filepath.Join(modelPath, bc.PromptCachePath)
+ if c.PromptCachePath != "" {
+ p := filepath.Join(modelPath, c.PromptCachePath)
os.MkdirAll(filepath.Dir(p), 0755)
promptCachePath = p
}
return &pb.PredictOptions{
- Temperature: float32(*bc.Temperature),
- TopP: float32(*bc.TopP),
- NDraft: bc.NDraft,
- TopK: int32(*bc.TopK),
- Tokens: int32(*bc.Maxtokens),
- Threads: int32(*bc.Threads),
- PromptCacheAll: bc.PromptCacheAll,
- PromptCacheRO: bc.PromptCacheRO,
+ Temperature: float32(*c.Temperature),
+ TopP: float32(*c.TopP),
+ NDraft: c.NDraft,
+ TopK: int32(*c.TopK),
+ Tokens: int32(*c.Maxtokens),
+ Threads: int32(*c.Threads),
+ PromptCacheAll: c.PromptCacheAll,
+ PromptCacheRO: c.PromptCacheRO,
PromptCachePath: promptCachePath,
- F16KV: *bc.F16,
- DebugMode: *bc.Debug,
- Grammar: bc.Grammar,
- NegativePromptScale: bc.NegativePromptScale,
- RopeFreqBase: bc.RopeFreqBase,
- RopeFreqScale: bc.RopeFreqScale,
- NegativePrompt: bc.NegativePrompt,
- Mirostat: int32(*bc.LLMConfig.Mirostat),
- MirostatETA: float32(*bc.LLMConfig.MirostatETA),
- MirostatTAU: float32(*bc.LLMConfig.MirostatTAU),
- Debug: *bc.Debug,
- StopPrompts: bc.StopWords,
- Repeat: int32(bc.RepeatPenalty),
- NKeep: int32(bc.Keep),
- Batch: int32(bc.Batch),
- IgnoreEOS: bc.IgnoreEOS,
- Seed: getSeed(bc),
- FrequencyPenalty: float32(bc.FrequencyPenalty),
- MLock: *bc.MMlock,
- MMap: *bc.MMap,
- MainGPU: bc.MainGPU,
- TensorSplit: bc.TensorSplit,
- TailFreeSamplingZ: float32(*bc.TFZ),
- TypicalP: float32(*bc.TypicalP),
+ F16KV: *c.F16,
+ DebugMode: *c.Debug,
+ Grammar: c.Grammar,
+ NegativePromptScale: c.NegativePromptScale,
+ RopeFreqBase: c.RopeFreqBase,
+ RopeFreqScale: c.RopeFreqScale,
+ NegativePrompt: c.NegativePrompt,
+ Mirostat: int32(*c.LLMConfig.Mirostat),
+ MirostatETA: float32(*c.LLMConfig.MirostatETA),
+ MirostatTAU: float32(*c.LLMConfig.MirostatTAU),
+ Debug: *c.Debug,
+ StopPrompts: c.StopWords,
+ Repeat: int32(c.RepeatPenalty),
+ NKeep: int32(c.Keep),
+ Batch: int32(c.Batch),
+ IgnoreEOS: c.IgnoreEOS,
+ Seed: getSeed(c),
+ FrequencyPenalty: float32(c.FrequencyPenalty),
+ MLock: *c.MMlock,
+ MMap: *c.MMap,
+ MainGPU: c.MainGPU,
+ TensorSplit: c.TensorSplit,
+ TailFreeSamplingZ: float32(*c.TFZ),
+ TypicalP: float32(*c.TypicalP),
}
}
diff --git a/core/backend/transcript.go b/core/backend/transcript.go
index 6761c2ac..4c3859df 100644
--- a/core/backend/transcript.go
+++ b/core/backend/transcript.go
@@ -7,48 +7,11 @@ import (
"github.com/go-skynet/LocalAI/core/config"
"github.com/go-skynet/LocalAI/core/schema"
- "github.com/go-skynet/LocalAI/pkg/concurrency"
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
- "github.com/go-skynet/LocalAI/pkg/model"
+ model "github.com/go-skynet/LocalAI/pkg/model"
)
-type TranscriptionBackendService struct {
- ml *model.ModelLoader
- bcl *config.BackendConfigLoader
- appConfig *config.ApplicationConfig
-}
-
-func NewTranscriptionBackendService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *TranscriptionBackendService {
- return &TranscriptionBackendService{
- ml: ml,
- bcl: bcl,
- appConfig: appConfig,
- }
-}
-
-func (tbs *TranscriptionBackendService) Transcribe(request *schema.OpenAIRequest) <-chan concurrency.ErrorOr[*schema.TranscriptionResult] {
- responseChannel := make(chan concurrency.ErrorOr[*schema.TranscriptionResult])
- go func(request *schema.OpenAIRequest) {
- bc, request, err := tbs.bcl.LoadBackendConfigForModelAndOpenAIRequest(request.Model, request, tbs.appConfig)
- if err != nil {
- responseChannel <- concurrency.ErrorOr[*schema.TranscriptionResult]{Error: fmt.Errorf("failed reading parameters from request:%w", err)}
- close(responseChannel)
- return
- }
-
- tr, err := modelTranscription(request.File, request.Language, tbs.ml, bc, tbs.appConfig)
- if err != nil {
- responseChannel <- concurrency.ErrorOr[*schema.TranscriptionResult]{Error: err}
- close(responseChannel)
- return
- }
- responseChannel <- concurrency.ErrorOr[*schema.TranscriptionResult]{Value: tr}
- close(responseChannel)
- }(request)
- return responseChannel
-}
-
-func modelTranscription(audio, language string, ml *model.ModelLoader, backendConfig *config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) {
+func ModelTranscription(audio, language string, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.Result, error) {
opts := modelOpts(backendConfig, appConfig, []model.Option{
model.WithBackendString(model.WhisperBackend),
diff --git a/core/backend/tts.go b/core/backend/tts.go
index d1fa270d..f97b6202 100644
--- a/core/backend/tts.go
+++ b/core/backend/tts.go
@@ -7,60 +7,29 @@ import (
"path/filepath"
"github.com/go-skynet/LocalAI/core/config"
- "github.com/go-skynet/LocalAI/core/schema"
- "github.com/go-skynet/LocalAI/pkg/concurrency"
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
- "github.com/go-skynet/LocalAI/pkg/model"
+ model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/go-skynet/LocalAI/pkg/utils"
)
-type TextToSpeechBackendService struct {
- ml *model.ModelLoader
- bcl *config.BackendConfigLoader
- appConfig *config.ApplicationConfig
-}
+func generateUniqueFileName(dir, baseName, ext string) string {
+ counter := 1
+ fileName := baseName + ext
-func NewTextToSpeechBackendService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *TextToSpeechBackendService {
- return &TextToSpeechBackendService{
- ml: ml,
- bcl: bcl,
- appConfig: appConfig,
+ for {
+ filePath := filepath.Join(dir, fileName)
+ _, err := os.Stat(filePath)
+ if os.IsNotExist(err) {
+ return fileName
+ }
+
+ counter++
+ fileName = fmt.Sprintf("%s_%d%s", baseName, counter, ext)
}
}
-func (ttsbs *TextToSpeechBackendService) TextToAudioFile(request *schema.TTSRequest) <-chan concurrency.ErrorOr[*string] {
- responseChannel := make(chan concurrency.ErrorOr[*string])
- go func(request *schema.TTSRequest) {
- cfg, err := ttsbs.bcl.LoadBackendConfigFileByName(request.Model, ttsbs.appConfig.ModelPath,
- config.LoadOptionDebug(ttsbs.appConfig.Debug),
- config.LoadOptionThreads(ttsbs.appConfig.Threads),
- config.LoadOptionContextSize(ttsbs.appConfig.ContextSize),
- config.LoadOptionF16(ttsbs.appConfig.F16),
- )
- if err != nil {
- responseChannel <- concurrency.ErrorOr[*string]{Error: err}
- close(responseChannel)
- return
- }
-
- if request.Backend != "" {
- cfg.Backend = request.Backend
- }
-
- outFile, _, err := modelTTS(cfg.Backend, request.Input, cfg.Model, request.Voice, ttsbs.ml, ttsbs.appConfig, cfg)
- if err != nil {
- responseChannel <- concurrency.ErrorOr[*string]{Error: err}
- close(responseChannel)
- return
- }
- responseChannel <- concurrency.ErrorOr[*string]{Value: &outFile}
- close(responseChannel)
- }(request)
- return responseChannel
-}
-
-func modelTTS(backend, text, modelFile string, voice string, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig *config.BackendConfig) (string, *proto.Result, error) {
+func ModelTTS(backend, text, modelFile, voice string, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (string, *proto.Result, error) {
bb := backend
if bb == "" {
bb = model.PiperBackend
@@ -68,7 +37,7 @@ func modelTTS(backend, text, modelFile string, voice string, loader *model.Model
grpcOpts := gRPCModelOpts(backendConfig)
- opts := modelOpts(&config.BackendConfig{}, appConfig, []model.Option{
+ opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
model.WithBackendString(bb),
model.WithModel(modelFile),
model.WithContext(appConfig.Context),
@@ -118,19 +87,3 @@ func modelTTS(backend, text, modelFile string, voice string, loader *model.Model
return filePath, res, err
}
-
-func generateUniqueFileName(dir, baseName, ext string) string {
- counter := 1
- fileName := baseName + ext
-
- for {
- filePath := filepath.Join(dir, fileName)
- _, err := os.Stat(filePath)
- if os.IsNotExist(err) {
- return fileName
- }
-
- counter++
- fileName = fmt.Sprintf("%s_%d%s", baseName, counter, ext)
- }
-}
diff --git a/core/cli/run.go b/core/cli/run.go
index cafc0b54..0f3ba2de 100644
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -124,11 +124,11 @@ func (r *RunCMD) Run(ctx *Context) error {
}
if r.PreloadBackendOnly {
- _, err := startup.Startup(opts...)
+ _, _, _, err := startup.Startup(opts...)
return err
}
- application, err := startup.Startup(opts...)
+ cl, ml, options, err := startup.Startup(opts...)
if err != nil {
return fmt.Errorf("failed basic startup tasks with error %s", err.Error())
@@ -137,7 +137,7 @@ func (r *RunCMD) Run(ctx *Context) error {
// Watch the configuration directory
// If the directory does not exist, we don't watch it
if _, err := os.Stat(r.LocalaiConfigDir); err == nil {
- closeConfigWatcherFn, err := startup.WatchConfigDirectory(r.LocalaiConfigDir, application.ApplicationConfig)
+ closeConfigWatcherFn, err := startup.WatchConfigDirectory(r.LocalaiConfigDir, options)
defer closeConfigWatcherFn()
if err != nil {
@@ -145,7 +145,7 @@ func (r *RunCMD) Run(ctx *Context) error {
}
}
- appHTTP, err := http.App(application)
+ appHTTP, err := http.App(cl, ml, options)
if err != nil {
log.Error().Err(err).Msg("error during HTTP App construction")
return err
diff --git a/core/cli/transcript.go b/core/cli/transcript.go
index f14a1a87..9f36a77c 100644
--- a/core/cli/transcript.go
+++ b/core/cli/transcript.go
@@ -7,7 +7,6 @@ import (
"github.com/go-skynet/LocalAI/core/backend"
"github.com/go-skynet/LocalAI/core/config"
- "github.com/go-skynet/LocalAI/core/schema"
"github.com/go-skynet/LocalAI/pkg/model"
)
@@ -44,21 +43,11 @@ func (t *TranscriptCMD) Run(ctx *Context) error {
defer ml.StopAllGRPC()
- tbs := backend.NewTranscriptionBackendService(ml, cl, opts)
-
- resultChannel := tbs.Transcribe(&schema.OpenAIRequest{
- PredictionOptions: schema.PredictionOptions{
- Language: t.Language,
- },
- File: t.Filename,
- })
-
- r := <-resultChannel
-
- if r.Error != nil {
- return r.Error
+ tr, err := backend.ModelTranscription(t.Filename, t.Language, ml, c, opts)
+ if err != nil {
+ return err
}
- for _, segment := range r.Value.Segments {
+ for _, segment := range tr.Segments {
fmt.Println(segment.Start.String(), "-", segment.Text)
}
return nil
diff --git a/core/cli/tts.go b/core/cli/tts.go
index c7758c48..1d8fd3a3 100644
--- a/core/cli/tts.go
+++ b/core/cli/tts.go
@@ -9,7 +9,6 @@ import (
"github.com/go-skynet/LocalAI/core/backend"
"github.com/go-skynet/LocalAI/core/config"
- "github.com/go-skynet/LocalAI/core/schema"
"github.com/go-skynet/LocalAI/pkg/model"
)
@@ -43,29 +42,20 @@ func (t *TTSCMD) Run(ctx *Context) error {
defer ml.StopAllGRPC()
- ttsbs := backend.NewTextToSpeechBackendService(ml, config.NewBackendConfigLoader(), opts)
+ options := config.BackendConfig{}
+ options.SetDefaults()
- request := &schema.TTSRequest{
- Model: t.Model,
- Input: text,
- Backend: t.Backend,
- Voice: t.Voice,
- }
-
- resultsChannel := ttsbs.TextToAudioFile(request)
-
- rawResult := <-resultsChannel
-
- if rawResult.Error != nil {
- return rawResult.Error
+ filePath, _, err := backend.ModelTTS(t.Backend, text, t.Model, t.Voice, ml, opts, options)
+ if err != nil {
+ return err
}
if outputFile != "" {
- if err := os.Rename(*rawResult.Value, outputFile); err != nil {
+ if err := os.Rename(filePath, outputFile); err != nil {
return err
}
- fmt.Printf("Generated file %q\n", outputFile)
+ fmt.Printf("Generate file %s\n", outputFile)
} else {
- fmt.Printf("Generated file %q\n", *rawResult.Value)
+ fmt.Printf("Generate file %s\n", filePath)
}
return nil
}
diff --git a/core/config/backend_config.go b/core/config/backend_config.go
index 47e4829d..81c92d01 100644
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -1,7 +1,22 @@
package config
import (
+ "errors"
+ "fmt"
+ "io/fs"
+ "os"
+ "path/filepath"
+ "sort"
+ "strings"
+ "sync"
+
"github.com/go-skynet/LocalAI/core/schema"
+ "github.com/go-skynet/LocalAI/pkg/downloader"
+ "github.com/go-skynet/LocalAI/pkg/utils"
+ "github.com/rs/zerolog/log"
+ "gopkg.in/yaml.v3"
+
+ "github.com/charmbracelet/glamour"
)
const (
@@ -184,7 +199,7 @@ func (c *BackendConfig) FunctionToCall() string {
}
func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
- lo := &ConfigLoaderOptions{}
+ lo := &LoadOptions{}
lo.Apply(opts...)
ctx := lo.ctxSize
@@ -297,3 +312,287 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
cfg.Debug = &trueV
}
}
+
+////// Config Loader ////////
+
+type BackendConfigLoader struct {
+ configs map[string]BackendConfig
+ sync.Mutex
+}
+
+type LoadOptions struct {
+ debug bool
+ threads, ctxSize int
+ f16 bool
+}
+
+func LoadOptionDebug(debug bool) ConfigLoaderOption {
+ return func(o *LoadOptions) {
+ o.debug = debug
+ }
+}
+
+func LoadOptionThreads(threads int) ConfigLoaderOption {
+ return func(o *LoadOptions) {
+ o.threads = threads
+ }
+}
+
+func LoadOptionContextSize(ctxSize int) ConfigLoaderOption {
+ return func(o *LoadOptions) {
+ o.ctxSize = ctxSize
+ }
+}
+
+func LoadOptionF16(f16 bool) ConfigLoaderOption {
+ return func(o *LoadOptions) {
+ o.f16 = f16
+ }
+}
+
+type ConfigLoaderOption func(*LoadOptions)
+
+func (lo *LoadOptions) Apply(options ...ConfigLoaderOption) {
+ for _, l := range options {
+ l(lo)
+ }
+}
+
+// Load a config file for a model
+func (cl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath string, opts ...ConfigLoaderOption) (*BackendConfig, error) {
+
+ // Load a config file if present after the model name
+ cfg := &BackendConfig{
+ PredictionOptions: schema.PredictionOptions{
+ Model: modelName,
+ },
+ }
+
+ cfgExisting, exists := cl.GetBackendConfig(modelName)
+ if exists {
+ cfg = &cfgExisting
+ } else {
+ // Try loading a model config file
+ modelConfig := filepath.Join(modelPath, modelName+".yaml")
+ if _, err := os.Stat(modelConfig); err == nil {
+ if err := cl.LoadBackendConfig(
+ modelConfig, opts...,
+ ); err != nil {
+ return nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
+ }
+ cfgExisting, exists = cl.GetBackendConfig(modelName)
+ if exists {
+ cfg = &cfgExisting
+ }
+ }
+ }
+
+ cfg.SetDefaults(opts...)
+
+ return cfg, nil
+}
+
+func NewBackendConfigLoader() *BackendConfigLoader {
+ return &BackendConfigLoader{
+ configs: make(map[string]BackendConfig),
+ }
+}
+func ReadBackendConfigFile(file string, opts ...ConfigLoaderOption) ([]*BackendConfig, error) {
+ c := &[]*BackendConfig{}
+ f, err := os.ReadFile(file)
+ if err != nil {
+ return nil, fmt.Errorf("cannot read config file: %w", err)
+ }
+ if err := yaml.Unmarshal(f, c); err != nil {
+ return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
+ }
+
+ for _, cc := range *c {
+ cc.SetDefaults(opts...)
+ }
+
+ return *c, nil
+}
+
+func ReadBackendConfig(file string, opts ...ConfigLoaderOption) (*BackendConfig, error) {
+ lo := &LoadOptions{}
+ lo.Apply(opts...)
+
+ c := &BackendConfig{}
+ f, err := os.ReadFile(file)
+ if err != nil {
+ return nil, fmt.Errorf("cannot read config file: %w", err)
+ }
+ if err := yaml.Unmarshal(f, c); err != nil {
+ return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
+ }
+
+ c.SetDefaults(opts...)
+ return c, nil
+}
+
+func (cm *BackendConfigLoader) LoadBackendConfigFile(file string, opts ...ConfigLoaderOption) error {
+ cm.Lock()
+ defer cm.Unlock()
+ c, err := ReadBackendConfigFile(file, opts...)
+ if err != nil {
+ return fmt.Errorf("cannot load config file: %w", err)
+ }
+
+ for _, cc := range c {
+ cm.configs[cc.Name] = *cc
+ }
+ return nil
+}
+
+func (cl *BackendConfigLoader) LoadBackendConfig(file string, opts ...ConfigLoaderOption) error {
+ cl.Lock()
+ defer cl.Unlock()
+ c, err := ReadBackendConfig(file, opts...)
+ if err != nil {
+ return fmt.Errorf("cannot read config file: %w", err)
+ }
+
+ cl.configs[c.Name] = *c
+ return nil
+}
+
+func (cl *BackendConfigLoader) GetBackendConfig(m string) (BackendConfig, bool) {
+ cl.Lock()
+ defer cl.Unlock()
+ v, exists := cl.configs[m]
+ return v, exists
+}
+
+func (cl *BackendConfigLoader) GetAllBackendConfigs() []BackendConfig {
+ cl.Lock()
+ defer cl.Unlock()
+ var res []BackendConfig
+ for _, v := range cl.configs {
+ res = append(res, v)
+ }
+
+ sort.SliceStable(res, func(i, j int) bool {
+ return res[i].Name < res[j].Name
+ })
+
+ return res
+}
+
+func (cl *BackendConfigLoader) ListBackendConfigs() []string {
+ cl.Lock()
+ defer cl.Unlock()
+ var res []string
+ for k := range cl.configs {
+ res = append(res, k)
+ }
+ return res
+}
+
+// Preload prepare models if they are not local but url or huggingface repositories
+func (cl *BackendConfigLoader) Preload(modelPath string) error {
+ cl.Lock()
+ defer cl.Unlock()
+
+ status := func(fileName, current, total string, percent float64) {
+ utils.DisplayDownloadFunction(fileName, current, total, percent)
+ }
+
+ log.Info().Msgf("Preloading models from %s", modelPath)
+
+ renderMode := "dark"
+ if os.Getenv("COLOR") != "" {
+ renderMode = os.Getenv("COLOR")
+ }
+
+ glamText := func(t string) {
+ out, err := glamour.Render(t, renderMode)
+ if err == nil && os.Getenv("NO_COLOR") == "" {
+ fmt.Println(out)
+ } else {
+ fmt.Println(t)
+ }
+ }
+
+ for i, config := range cl.configs {
+
+ // Download files and verify their SHA
+ for _, file := range config.DownloadFiles {
+ log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename)
+
+ if err := utils.VerifyPath(file.Filename, modelPath); err != nil {
+ return err
+ }
+ // Create file path
+ filePath := filepath.Join(modelPath, file.Filename)
+
+ if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, status); err != nil {
+ return err
+ }
+ }
+
+ modelURL := config.PredictionOptions.Model
+ modelURL = downloader.ConvertURL(modelURL)
+
+ if downloader.LooksLikeURL(modelURL) {
+ // md5 of model name
+ md5Name := utils.MD5(modelURL)
+
+ // check if file exists
+ if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) {
+ err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", status)
+ if err != nil {
+ return err
+ }
+ }
+
+ cc := cl.configs[i]
+ c := &cc
+ c.PredictionOptions.Model = md5Name
+ cl.configs[i] = *c
+ }
+ if cl.configs[i].Name != "" {
+ glamText(fmt.Sprintf("**Model name**: _%s_", cl.configs[i].Name))
+ }
+ if cl.configs[i].Description != "" {
+ //glamText("**Description**")
+ glamText(cl.configs[i].Description)
+ }
+ if cl.configs[i].Usage != "" {
+ //glamText("**Usage**")
+ glamText(cl.configs[i].Usage)
+ }
+ }
+ return nil
+}
+
+// LoadBackendConfigsFromPath reads all the configurations of the models from a path
+// (non-recursive)
+func (cm *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...ConfigLoaderOption) error {
+ cm.Lock()
+ defer cm.Unlock()
+ entries, err := os.ReadDir(path)
+ if err != nil {
+ return err
+ }
+ files := make([]fs.FileInfo, 0, len(entries))
+ for _, entry := range entries {
+ info, err := entry.Info()
+ if err != nil {
+ return err
+ }
+ files = append(files, info)
+ }
+ for _, file := range files {
+ // Skip templates, YAML and .keep files
+ if !strings.Contains(file.Name(), ".yaml") && !strings.Contains(file.Name(), ".yml") {
+ continue
+ }
+ c, err := ReadBackendConfig(filepath.Join(path, file.Name()), opts...)
+ if err == nil {
+ cm.configs[c.Name] = *c
+ }
+ }
+
+ return nil
+}
diff --git a/core/config/backend_config_loader.go b/core/config/backend_config_loader.go
deleted file mode 100644
index 62dfc1e0..00000000
--- a/core/config/backend_config_loader.go
+++ /dev/null
@@ -1,509 +0,0 @@
-package config
-
-import (
- "encoding/json"
- "errors"
- "fmt"
- "io/fs"
- "os"
- "path/filepath"
- "sort"
- "strings"
- "sync"
-
- "github.com/charmbracelet/glamour"
- "github.com/go-skynet/LocalAI/core/schema"
- "github.com/go-skynet/LocalAI/pkg/downloader"
- "github.com/go-skynet/LocalAI/pkg/grammar"
- "github.com/go-skynet/LocalAI/pkg/utils"
- "github.com/rs/zerolog/log"
- "gopkg.in/yaml.v2"
-)
-
-type BackendConfigLoader struct {
- configs map[string]BackendConfig
- sync.Mutex
-}
-
-type ConfigLoaderOptions struct {
- debug bool
- threads, ctxSize int
- f16 bool
-}
-
-func LoadOptionDebug(debug bool) ConfigLoaderOption {
- return func(o *ConfigLoaderOptions) {
- o.debug = debug
- }
-}
-
-func LoadOptionThreads(threads int) ConfigLoaderOption {
- return func(o *ConfigLoaderOptions) {
- o.threads = threads
- }
-}
-
-func LoadOptionContextSize(ctxSize int) ConfigLoaderOption {
- return func(o *ConfigLoaderOptions) {
- o.ctxSize = ctxSize
- }
-}
-
-func LoadOptionF16(f16 bool) ConfigLoaderOption {
- return func(o *ConfigLoaderOptions) {
- o.f16 = f16
- }
-}
-
-type ConfigLoaderOption func(*ConfigLoaderOptions)
-
-func (lo *ConfigLoaderOptions) Apply(options ...ConfigLoaderOption) {
- for _, l := range options {
- l(lo)
- }
-}
-
-func NewBackendConfigLoader() *BackendConfigLoader {
- return &BackendConfigLoader{
- configs: make(map[string]BackendConfig),
- }
-}
-
-func (bcl *BackendConfigLoader) LoadBackendConfig(file string, opts ...ConfigLoaderOption) error {
- bcl.Lock()
- defer bcl.Unlock()
- c, err := readBackendConfig(file, opts...)
- if err != nil {
- return fmt.Errorf("cannot read config file: %w", err)
- }
-
- bcl.configs[c.Name] = *c
- return nil
-}
-
-func (bcl *BackendConfigLoader) GetBackendConfig(m string) (BackendConfig, bool) {
- bcl.Lock()
- defer bcl.Unlock()
- v, exists := bcl.configs[m]
- return v, exists
-}
-
-func (bcl *BackendConfigLoader) GetAllBackendConfigs() []BackendConfig {
- bcl.Lock()
- defer bcl.Unlock()
- var res []BackendConfig
- for _, v := range bcl.configs {
- res = append(res, v)
- }
- sort.SliceStable(res, func(i, j int) bool {
- return res[i].Name < res[j].Name
- })
- return res
-}
-
-func (bcl *BackendConfigLoader) ListBackendConfigs() []string {
- bcl.Lock()
- defer bcl.Unlock()
- var res []string
- for k := range bcl.configs {
- res = append(res, k)
- }
- return res
-}
-
-// Preload prepare models if they are not local but url or huggingface repositories
-func (bcl *BackendConfigLoader) Preload(modelPath string) error {
- bcl.Lock()
- defer bcl.Unlock()
-
- status := func(fileName, current, total string, percent float64) {
- utils.DisplayDownloadFunction(fileName, current, total, percent)
- }
-
- log.Info().Msgf("Preloading models from %s", modelPath)
-
- renderMode := "dark"
- if os.Getenv("COLOR") != "" {
- renderMode = os.Getenv("COLOR")
- }
-
- glamText := func(t string) {
- out, err := glamour.Render(t, renderMode)
- if err == nil && os.Getenv("NO_COLOR") == "" {
- fmt.Println(out)
- } else {
- fmt.Println(t)
- }
- }
-
- for i, config := range bcl.configs {
-
- // Download files and verify their SHA
- for _, file := range config.DownloadFiles {
- log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename)
-
- if err := utils.VerifyPath(file.Filename, modelPath); err != nil {
- return err
- }
- // Create file path
- filePath := filepath.Join(modelPath, file.Filename)
-
- if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, status); err != nil {
- return err
- }
- }
-
- modelURL := config.PredictionOptions.Model
- modelURL = downloader.ConvertURL(modelURL)
-
- if downloader.LooksLikeURL(modelURL) {
- // md5 of model name
- md5Name := utils.MD5(modelURL)
-
- // check if file exists
- if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) {
- err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", status)
- if err != nil {
- return err
- }
- }
-
- cc := bcl.configs[i]
- c := &cc
- c.PredictionOptions.Model = md5Name
- bcl.configs[i] = *c
- }
- if bcl.configs[i].Name != "" {
- glamText(fmt.Sprintf("**Model name**: _%s_", bcl.configs[i].Name))
- }
- if bcl.configs[i].Description != "" {
- //glamText("**Description**")
- glamText(bcl.configs[i].Description)
- }
- if bcl.configs[i].Usage != "" {
- //glamText("**Usage**")
- glamText(bcl.configs[i].Usage)
- }
- }
- return nil
-}
-
-func (bcl *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...ConfigLoaderOption) error {
- bcl.Lock()
- defer bcl.Unlock()
- entries, err := os.ReadDir(path)
- if err != nil {
- return err
- }
- files := make([]fs.FileInfo, 0, len(entries))
- for _, entry := range entries {
- info, err := entry.Info()
- if err != nil {
- return err
- }
- files = append(files, info)
- }
- for _, file := range files {
- // Skip templates, YAML and .keep files
- if !strings.Contains(file.Name(), ".yaml") && !strings.Contains(file.Name(), ".yml") {
- continue
- }
- c, err := readBackendConfig(filepath.Join(path, file.Name()), opts...)
- if err == nil {
- bcl.configs[c.Name] = *c
- }
- }
-
- return nil
-}
-
-func (bcl *BackendConfigLoader) LoadBackendConfigFile(file string, opts ...ConfigLoaderOption) error {
- bcl.Lock()
- defer bcl.Unlock()
- c, err := readBackendConfigFile(file, opts...)
- if err != nil {
- return fmt.Errorf("cannot load config file: %w", err)
- }
-
- for _, cc := range c {
- bcl.configs[cc.Name] = *cc
- }
- return nil
-}
-
-//////////
-
-// Load a config file for a model
-func (bcl *BackendConfigLoader) LoadBackendConfigFileByName(modelName string, modelPath string, opts ...ConfigLoaderOption) (*BackendConfig, error) {
-
- // Load a config file if present after the model name
- cfg := &BackendConfig{
- PredictionOptions: schema.PredictionOptions{
- Model: modelName,
- },
- }
-
- cfgExisting, exists := bcl.GetBackendConfig(modelName)
- if exists {
- cfg = &cfgExisting
- } else {
- // Load a config file if present after the model name
- modelConfig := filepath.Join(modelPath, modelName+".yaml")
- if _, err := os.Stat(modelConfig); err == nil {
- if err := bcl.LoadBackendConfig(modelConfig); err != nil {
- return nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
- }
- cfgExisting, exists = bcl.GetBackendConfig(modelName)
- if exists {
- cfg = &cfgExisting
- }
- }
- }
-
- cfg.SetDefaults(opts...)
- return cfg, nil
-}
-
-func readBackendConfigFile(file string, opts ...ConfigLoaderOption) ([]*BackendConfig, error) {
- c := &[]*BackendConfig{}
- f, err := os.ReadFile(file)
- if err != nil {
- return nil, fmt.Errorf("cannot read config file: %w", err)
- }
- if err := yaml.Unmarshal(f, c); err != nil {
- return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
- }
-
- for _, cc := range *c {
- cc.SetDefaults(opts...)
- }
-
- return *c, nil
-}
-
-func readBackendConfig(file string, opts ...ConfigLoaderOption) (*BackendConfig, error) {
- c := &BackendConfig{}
- f, err := os.ReadFile(file)
- if err != nil {
- return nil, fmt.Errorf("cannot read config file: %w", err)
- }
- if err := yaml.Unmarshal(f, c); err != nil {
- return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
- }
-
- c.SetDefaults(opts...)
- return c, nil
-}
-
-func (bcl *BackendConfigLoader) LoadBackendConfigForModelAndOpenAIRequest(modelFile string, input *schema.OpenAIRequest, appConfig *ApplicationConfig) (*BackendConfig, *schema.OpenAIRequest, error) {
- cfg, err := bcl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
- LoadOptionContextSize(appConfig.ContextSize),
- LoadOptionDebug(appConfig.Debug),
- LoadOptionF16(appConfig.F16),
- LoadOptionThreads(appConfig.Threads),
- )
-
- // Set the parameters for the language model prediction
- updateBackendConfigFromOpenAIRequest(cfg, input)
-
- return cfg, input, err
-}
-
-func updateBackendConfigFromOpenAIRequest(bc *BackendConfig, request *schema.OpenAIRequest) {
- if request.Echo {
- bc.Echo = request.Echo
- }
- if request.TopK != nil && *request.TopK != 0 {
- bc.TopK = request.TopK
- }
- if request.TopP != nil && *request.TopP != 0 {
- bc.TopP = request.TopP
- }
-
- if request.Backend != "" {
- bc.Backend = request.Backend
- }
-
- if request.ClipSkip != 0 {
- bc.Diffusers.ClipSkip = request.ClipSkip
- }
-
- if request.ModelBaseName != "" {
- bc.AutoGPTQ.ModelBaseName = request.ModelBaseName
- }
-
- if request.NegativePromptScale != 0 {
- bc.NegativePromptScale = request.NegativePromptScale
- }
-
- if request.UseFastTokenizer {
- bc.UseFastTokenizer = request.UseFastTokenizer
- }
-
- if request.NegativePrompt != "" {
- bc.NegativePrompt = request.NegativePrompt
- }
-
- if request.RopeFreqBase != 0 {
- bc.RopeFreqBase = request.RopeFreqBase
- }
-
- if request.RopeFreqScale != 0 {
- bc.RopeFreqScale = request.RopeFreqScale
- }
-
- if request.Grammar != "" {
- bc.Grammar = request.Grammar
- }
-
- if request.Temperature != nil && *request.Temperature != 0 {
- bc.Temperature = request.Temperature
- }
-
- if request.Maxtokens != nil && *request.Maxtokens != 0 {
- bc.Maxtokens = request.Maxtokens
- }
-
- switch stop := request.Stop.(type) {
- case string:
- if stop != "" {
- bc.StopWords = append(bc.StopWords, stop)
- }
- case []interface{}:
- for _, pp := range stop {
- if s, ok := pp.(string); ok {
- bc.StopWords = append(bc.StopWords, s)
- }
- }
- }
-
- if len(request.Tools) > 0 {
- for _, tool := range request.Tools {
- request.Functions = append(request.Functions, tool.Function)
- }
- }
-
- if request.ToolsChoice != nil {
- var toolChoice grammar.Tool
- switch content := request.ToolsChoice.(type) {
- case string:
- _ = json.Unmarshal([]byte(content), &toolChoice)
- case map[string]interface{}:
- dat, _ := json.Marshal(content)
- _ = json.Unmarshal(dat, &toolChoice)
- }
- request.FunctionCall = map[string]interface{}{
- "name": toolChoice.Function.Name,
- }
- }
-
- // Decode each request's message content
- index := 0
- for i, m := range request.Messages {
- switch content := m.Content.(type) {
- case string:
- request.Messages[i].StringContent = content
- case []interface{}:
- dat, _ := json.Marshal(content)
- c := []schema.Content{}
- json.Unmarshal(dat, &c)
- for _, pp := range c {
- if pp.Type == "text" {
- request.Messages[i].StringContent = pp.Text
- } else if pp.Type == "image_url" {
- // Detect if pp.ImageURL is an URL, if it is download the image and encode it in base64:
- base64, err := utils.GetImageURLAsBase64(pp.ImageURL.URL)
- if err == nil {
- request.Messages[i].StringImages = append(request.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff
- // set a placeholder for each image
- request.Messages[i].StringContent = fmt.Sprintf("[img-%d]", index) + request.Messages[i].StringContent
- index++
- } else {
- fmt.Print("Failed encoding image", err)
- }
- }
- }
- }
- }
-
- if request.RepeatPenalty != 0 {
- bc.RepeatPenalty = request.RepeatPenalty
- }
-
- if request.FrequencyPenalty != 0 {
- bc.FrequencyPenalty = request.FrequencyPenalty
- }
-
- if request.PresencePenalty != 0 {
- bc.PresencePenalty = request.PresencePenalty
- }
-
- if request.Keep != 0 {
- bc.Keep = request.Keep
- }
-
- if request.Batch != 0 {
- bc.Batch = request.Batch
- }
-
- if request.IgnoreEOS {
- bc.IgnoreEOS = request.IgnoreEOS
- }
-
- if request.Seed != nil {
- bc.Seed = request.Seed
- }
-
- if request.TypicalP != nil {
- bc.TypicalP = request.TypicalP
- }
-
- switch inputs := request.Input.(type) {
- case string:
- if inputs != "" {
- bc.InputStrings = append(bc.InputStrings, inputs)
- }
- case []interface{}:
- for _, pp := range inputs {
- switch i := pp.(type) {
- case string:
- bc.InputStrings = append(bc.InputStrings, i)
- case []interface{}:
- tokens := []int{}
- for _, ii := range i {
- tokens = append(tokens, int(ii.(float64)))
- }
- bc.InputToken = append(bc.InputToken, tokens)
- }
- }
- }
-
- // Can be either a string or an object
- switch fnc := request.FunctionCall.(type) {
- case string:
- if fnc != "" {
- bc.SetFunctionCallString(fnc)
- }
- case map[string]interface{}:
- var name string
- n, exists := fnc["name"]
- if exists {
- nn, e := n.(string)
- if e {
- name = nn
- }
- }
- bc.SetFunctionCallNameString(name)
- }
-
- switch p := request.Prompt.(type) {
- case string:
- bc.PromptStrings = append(bc.PromptStrings, p)
- case []interface{}:
- for _, pp := range p {
- if s, ok := pp.(string); ok {
- bc.PromptStrings = append(bc.PromptStrings, s)
- }
- }
- }
-}
diff --git a/core/config/exports_test.go b/core/config/exports_test.go
deleted file mode 100644
index 70ba84e6..00000000
--- a/core/config/exports_test.go
+++ /dev/null
@@ -1,6 +0,0 @@
-package config
-
-// This file re-exports private functions to be used directly in unit tests.
-// Since this file's name ends in _test.go, theoretically these should not be exposed past the tests.
-
-var ReadBackendConfigFile = readBackendConfigFile
diff --git a/core/http/api.go b/core/http/api.go
index 7094899a..af38512a 100644
--- a/core/http/api.go
+++ b/core/http/api.go
@@ -1,20 +1,23 @@
package http
import (
+ "encoding/json"
"errors"
+ "os"
"strings"
- "github.com/go-skynet/LocalAI/core"
- fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
+ "github.com/go-skynet/LocalAI/pkg/utils"
"github.com/gofiber/swagger" // swagger handler
"github.com/go-skynet/LocalAI/core/http/endpoints/elevenlabs"
"github.com/go-skynet/LocalAI/core/http/endpoints/localai"
"github.com/go-skynet/LocalAI/core/http/endpoints/openai"
+
+ "github.com/go-skynet/LocalAI/core/config"
"github.com/go-skynet/LocalAI/core/schema"
"github.com/go-skynet/LocalAI/core/services"
"github.com/go-skynet/LocalAI/internal"
- model "github.com/go-skynet/LocalAI/pkg/model"
+ "github.com/go-skynet/LocalAI/pkg/model"
"github.com/gofiber/fiber/v2"
"github.com/gofiber/fiber/v2/middleware/cors"
@@ -52,12 +55,13 @@ func readAuthHeader(c *fiber.Ctx) string {
// @securityDefinitions.apikey BearerAuth
// @in header
// @name Authorization
-func App(application *core.Application) (*fiber.App, error) {
+
+func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (*fiber.App, error) {
// Return errors as JSON responses
app := fiber.New(fiber.Config{
Views: renderEngine(),
- BodyLimit: application.ApplicationConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
- DisableStartupMessage: application.ApplicationConfig.DisableMessage,
+ BodyLimit: appConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
+ DisableStartupMessage: appConfig.DisableMessage,
// Override default error handler
ErrorHandler: func(ctx *fiber.Ctx, err error) error {
// Status code defaults to 500
@@ -78,7 +82,7 @@ func App(application *core.Application) (*fiber.App, error) {
},
})
- if application.ApplicationConfig.Debug {
+ if appConfig.Debug {
app.Use(logger.New(logger.Config{
Format: "[${ip}]:${port} ${status} - ${method} ${path}\n",
}))
@@ -86,7 +90,7 @@ func App(application *core.Application) (*fiber.App, error) {
// Default middleware config
- if !application.ApplicationConfig.Debug {
+ if !appConfig.Debug {
app.Use(recover.New())
}
@@ -104,7 +108,25 @@ func App(application *core.Application) (*fiber.App, error) {
// Auth middleware checking if API key is valid. If no API key is set, no auth is required.
auth := func(c *fiber.Ctx) error {
- if len(application.ApplicationConfig.ApiKeys) == 0 {
+ if len(appConfig.ApiKeys) == 0 {
+ return c.Next()
+ }
+
+ // Check for api_keys.json file
+ fileContent, err := os.ReadFile("api_keys.json")
+ if err == nil {
+ // Parse JSON content from the file
+ var fileKeys []string
+ err := json.Unmarshal(fileContent, &fileKeys)
+ if err != nil {
+ return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{"message": "Error parsing api_keys.json"})
+ }
+
+ // Add file keys to options.ApiKeys
+ appConfig.ApiKeys = append(appConfig.ApiKeys, fileKeys...)
+ }
+
+ if len(appConfig.ApiKeys) == 0 {
return c.Next()
}
@@ -120,7 +142,7 @@ func App(application *core.Application) (*fiber.App, error) {
}
apiKey := authHeaderParts[1]
- for _, key := range application.ApplicationConfig.ApiKeys {
+ for _, key := range appConfig.ApiKeys {
if apiKey == key {
return c.Next()
}
@@ -129,22 +151,20 @@ func App(application *core.Application) (*fiber.App, error) {
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid API key"})
}
- if application.ApplicationConfig.CORS {
+ if appConfig.CORS {
var c func(ctx *fiber.Ctx) error
- if application.ApplicationConfig.CORSAllowOrigins == "" {
+ if appConfig.CORSAllowOrigins == "" {
c = cors.New()
} else {
- c = cors.New(cors.Config{AllowOrigins: application.ApplicationConfig.CORSAllowOrigins})
+ c = cors.New(cors.Config{AllowOrigins: appConfig.CORSAllowOrigins})
}
app.Use(c)
}
- fiberContextExtractor := fiberContext.NewFiberContextExtractor(application.ModelLoader, application.ApplicationConfig)
-
// LocalAI API endpoints
- galleryService := services.NewGalleryService(application.ApplicationConfig.ModelPath)
- galleryService.Start(application.ApplicationConfig.Context, application.BackendConfigLoader)
+ galleryService := services.NewGalleryService(appConfig.ModelPath)
+ galleryService.Start(appConfig.Context, cl)
app.Get("/version", auth, func(c *fiber.Ctx) error {
return c.JSON(struct {
@@ -152,17 +172,29 @@ func App(application *core.Application) (*fiber.App, error) {
}{Version: internal.PrintableVersion()})
})
+ // Make sure directories exists
+ os.MkdirAll(appConfig.ImageDir, 0755)
+ os.MkdirAll(appConfig.AudioDir, 0755)
+ os.MkdirAll(appConfig.UploadDir, 0755)
+ os.MkdirAll(appConfig.ConfigsDir, 0755)
+ os.MkdirAll(appConfig.ModelPath, 0755)
+
+ // Load config jsons
+ utils.LoadConfig(appConfig.UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles)
+ utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants)
+ utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles)
+
app.Get("/swagger/*", swagger.HandlerDefault) // default
welcomeRoute(
app,
- application.BackendConfigLoader,
- application.ModelLoader,
- application.ApplicationConfig,
+ cl,
+ ml,
+ appConfig,
auth,
)
- modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(application.ApplicationConfig.Galleries, application.ApplicationConfig.ModelPath, galleryService)
+ modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService)
app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint())
app.Get("/models/available", auth, modelGalleryEndpointService.ListModelFromGalleryEndpoint())
app.Get("/models/galleries", auth, modelGalleryEndpointService.ListModelGalleriesEndpoint())
@@ -171,85 +203,83 @@ func App(application *core.Application) (*fiber.App, error) {
app.Get("/models/jobs/:uuid", auth, modelGalleryEndpointService.GetOpStatusEndpoint())
app.Get("/models/jobs", auth, modelGalleryEndpointService.GetAllStatusEndpoint())
- // Stores
- storeLoader := model.NewModelLoader("") // TODO: Investigate if this should be migrated to application and reused. Should the path be configurable? Merging for now.
- app.Post("/stores/set", auth, localai.StoresSetEndpoint(storeLoader, application.ApplicationConfig))
- app.Post("/stores/delete", auth, localai.StoresDeleteEndpoint(storeLoader, application.ApplicationConfig))
- app.Post("/stores/get", auth, localai.StoresGetEndpoint(storeLoader, application.ApplicationConfig))
- app.Post("/stores/find", auth, localai.StoresFindEndpoint(storeLoader, application.ApplicationConfig))
-
- // openAI compatible API endpoints
-
- // chat
- app.Post("/v1/chat/completions", auth, openai.ChatEndpoint(fiberContextExtractor, application.OpenAIService))
- app.Post("/chat/completions", auth, openai.ChatEndpoint(fiberContextExtractor, application.OpenAIService))
-
- // edit
- app.Post("/v1/edits", auth, openai.EditEndpoint(fiberContextExtractor, application.OpenAIService))
- app.Post("/edits", auth, openai.EditEndpoint(fiberContextExtractor, application.OpenAIService))
-
- // assistant
- // TODO: Refactor this to the new style eventually
- app.Get("/v1/assistants", auth, openai.ListAssistantsEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig))
- app.Get("/assistants", auth, openai.ListAssistantsEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig))
- app.Post("/v1/assistants", auth, openai.CreateAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig))
- app.Post("/assistants", auth, openai.CreateAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig))
- app.Delete("/v1/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig))
- app.Delete("/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig))
- app.Get("/v1/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig))
- app.Get("/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig))
- app.Post("/v1/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig))
- app.Post("/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig))
- app.Get("/v1/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig))
- app.Get("/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig))
- app.Post("/v1/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig))
- app.Post("/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig))
- app.Delete("/v1/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig))
- app.Delete("/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig))
- app.Get("/v1/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig))
- app.Get("/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig))
-
- // files
- app.Post("/v1/files", auth, openai.UploadFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig))
- app.Post("/files", auth, openai.UploadFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig))
- app.Get("/v1/files", auth, openai.ListFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig))
- app.Get("/files", auth, openai.ListFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig))
- app.Get("/v1/files/:file_id", auth, openai.GetFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig))
- app.Get("/files/:file_id", auth, openai.GetFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig))
- app.Delete("/v1/files/:file_id", auth, openai.DeleteFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig))
- app.Delete("/files/:file_id", auth, openai.DeleteFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig))
- app.Get("/v1/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(application.BackendConfigLoader, application.ApplicationConfig))
- app.Get("/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(application.BackendConfigLoader, application.ApplicationConfig))
-
- // completion
- app.Post("/v1/completions", auth, openai.CompletionEndpoint(fiberContextExtractor, application.OpenAIService))
- app.Post("/completions", auth, openai.CompletionEndpoint(fiberContextExtractor, application.OpenAIService))
- app.Post("/v1/engines/:model/completions", auth, openai.CompletionEndpoint(fiberContextExtractor, application.OpenAIService))
-
- // embeddings
- app.Post("/v1/embeddings", auth, openai.EmbeddingsEndpoint(fiberContextExtractor, application.EmbeddingsBackendService))
- app.Post("/embeddings", auth, openai.EmbeddingsEndpoint(fiberContextExtractor, application.EmbeddingsBackendService))
- app.Post("/v1/engines/:model/embeddings", auth, openai.EmbeddingsEndpoint(fiberContextExtractor, application.EmbeddingsBackendService))
-
- // audio
- app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(fiberContextExtractor, application.TranscriptionBackendService))
- app.Post("/v1/audio/speech", auth, localai.TTSEndpoint(fiberContextExtractor, application.TextToSpeechBackendService))
-
- // images
- app.Post("/v1/images/generations", auth, openai.ImageEndpoint(fiberContextExtractor, application.ImageGenerationBackendService))
+ app.Post("/tts", auth, localai.TTSEndpoint(cl, ml, appConfig))
// Elevenlabs
- app.Post("/v1/text-to-speech/:voice-id", auth, elevenlabs.TTSEndpoint(fiberContextExtractor, application.TextToSpeechBackendService))
+ app.Post("/v1/text-to-speech/:voice-id", auth, elevenlabs.TTSEndpoint(cl, ml, appConfig))
- // LocalAI TTS?
- app.Post("/tts", auth, localai.TTSEndpoint(fiberContextExtractor, application.TextToSpeechBackendService))
+ // Stores
+ sl := model.NewModelLoader("")
+ app.Post("/stores/set", auth, localai.StoresSetEndpoint(sl, appConfig))
+ app.Post("/stores/delete", auth, localai.StoresDeleteEndpoint(sl, appConfig))
+ app.Post("/stores/get", auth, localai.StoresGetEndpoint(sl, appConfig))
+ app.Post("/stores/find", auth, localai.StoresFindEndpoint(sl, appConfig))
- if application.ApplicationConfig.ImageDir != "" {
- app.Static("/generated-images", application.ApplicationConfig.ImageDir)
+ // openAI compatible API endpoint
+
+ // chat
+ app.Post("/v1/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig))
+ app.Post("/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig))
+
+ // edit
+ app.Post("/v1/edits", auth, openai.EditEndpoint(cl, ml, appConfig))
+ app.Post("/edits", auth, openai.EditEndpoint(cl, ml, appConfig))
+
+ // assistant
+ app.Get("/v1/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig))
+ app.Get("/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig))
+ app.Post("/v1/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig))
+ app.Post("/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig))
+ app.Delete("/v1/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig))
+ app.Delete("/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig))
+ app.Get("/v1/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig))
+ app.Get("/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig))
+ app.Post("/v1/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig))
+ app.Post("/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig))
+ app.Get("/v1/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
+ app.Get("/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
+ app.Post("/v1/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
+ app.Post("/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
+ app.Delete("/v1/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
+ app.Delete("/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
+ app.Get("/v1/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig))
+ app.Get("/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig))
+
+ // files
+ app.Post("/v1/files", auth, openai.UploadFilesEndpoint(cl, appConfig))
+ app.Post("/files", auth, openai.UploadFilesEndpoint(cl, appConfig))
+ app.Get("/v1/files", auth, openai.ListFilesEndpoint(cl, appConfig))
+ app.Get("/files", auth, openai.ListFilesEndpoint(cl, appConfig))
+ app.Get("/v1/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig))
+ app.Get("/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig))
+ app.Delete("/v1/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig))
+ app.Delete("/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig))
+ app.Get("/v1/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig))
+ app.Get("/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig))
+
+ // completion
+ app.Post("/v1/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
+ app.Post("/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
+ app.Post("/v1/engines/:model/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
+
+ // embeddings
+ app.Post("/v1/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
+ app.Post("/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
+ app.Post("/v1/engines/:model/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
+
+ // audio
+ app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(cl, ml, appConfig))
+ app.Post("/v1/audio/speech", auth, localai.TTSEndpoint(cl, ml, appConfig))
+
+ // images
+ app.Post("/v1/images/generations", auth, openai.ImageEndpoint(cl, ml, appConfig))
+
+ if appConfig.ImageDir != "" {
+ app.Static("/generated-images", appConfig.ImageDir)
}
- if application.ApplicationConfig.AudioDir != "" {
- app.Static("/generated-audio", application.ApplicationConfig.AudioDir)
+ if appConfig.AudioDir != "" {
+ app.Static("/generated-audio", appConfig.AudioDir)
}
ok := func(c *fiber.Ctx) error {
@@ -261,12 +291,13 @@ func App(application *core.Application) (*fiber.App, error) {
app.Get("/readyz", ok)
// Experimental Backend Statistics Module
- app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(application.BackendMonitorService))
- app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(application.BackendMonitorService))
+ backendMonitor := services.NewBackendMonitor(cl, ml, appConfig) // Split out for now
+ app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(backendMonitor))
+ app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(backendMonitor))
// models
- app.Get("/v1/models", auth, openai.ListModelsEndpoint(application.ListModelsService))
- app.Get("/models", auth, openai.ListModelsEndpoint(application.ListModelsService))
+ app.Get("/v1/models", auth, openai.ListModelsEndpoint(cl, ml))
+ app.Get("/models", auth, openai.ListModelsEndpoint(cl, ml))
app.Get("/metrics", auth, localai.LocalAIMetricsEndpoint())
diff --git a/core/http/api_test.go b/core/http/api_test.go
index bf8feb1c..1553ed21 100644
--- a/core/http/api_test.go
+++ b/core/http/api_test.go
@@ -12,9 +12,7 @@ import (
"os"
"path/filepath"
"runtime"
- "strings"
- "github.com/go-skynet/LocalAI/core"
"github.com/go-skynet/LocalAI/core/config"
. "github.com/go-skynet/LocalAI/core/http"
"github.com/go-skynet/LocalAI/core/schema"
@@ -207,7 +205,9 @@ var _ = Describe("API test", func() {
var cancel context.CancelFunc
var tmpdir string
var modelDir string
- var application *core.Application
+ var bcl *config.BackendConfigLoader
+ var ml *model.ModelLoader
+ var applicationConfig *config.ApplicationConfig
commonOpts := []config.AppOption{
config.WithDebug(true),
@@ -252,7 +252,7 @@ var _ = Describe("API test", func() {
},
}
- application, err = startup.Startup(
+ bcl, ml, applicationConfig, err = startup.Startup(
append(commonOpts,
config.WithContext(c),
config.WithGalleries(galleries),
@@ -261,7 +261,7 @@ var _ = Describe("API test", func() {
config.WithBackendAssetsOutput(backendAssetsDir))...)
Expect(err).ToNot(HaveOccurred())
- app, err = App(application)
+ app, err = App(bcl, ml, applicationConfig)
Expect(err).ToNot(HaveOccurred())
go app.Listen("127.0.0.1:9090")
@@ -474,11 +474,11 @@ var _ = Describe("API test", func() {
})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp2.Choices)).To(Equal(1))
- Expect(resp2.Choices[0].Message.ToolCalls[0].Function).ToNot(BeNil())
- Expect(resp2.Choices[0].Message.ToolCalls[0].Function.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.ToolCalls[0].Function.Name)
+ Expect(resp2.Choices[0].Message.FunctionCall).ToNot(BeNil())
+ Expect(resp2.Choices[0].Message.FunctionCall.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.FunctionCall.Name)
var res map[string]string
- err = json.Unmarshal([]byte(resp2.Choices[0].Message.ToolCalls[0].Function.Arguments), &res)
+ err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res)
Expect(err).ToNot(HaveOccurred())
Expect(res["location"]).To(Equal("San Francisco"), fmt.Sprint(res))
Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
@@ -487,9 +487,9 @@ var _ = Describe("API test", func() {
})
It("runs openllama gguf(llama-cpp)", Label("llama-gguf"), func() {
- // if runtime.GOOS != "linux" {
- // Skip("test supported only on linux")
- // }
+ if runtime.GOOS != "linux" {
+ Skip("test supported only on linux")
+ }
modelName := "codellama"
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
URL: "github:go-skynet/model-gallery/codellama-7b-instruct.yaml",
@@ -504,7 +504,7 @@ var _ = Describe("API test", func() {
Eventually(func() bool {
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
return response["processed"].(bool)
- }, "480s", "10s").Should(Equal(true))
+ }, "360s", "10s").Should(Equal(true))
By("testing chat")
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: modelName, Messages: []openai.ChatCompletionMessage{
@@ -551,13 +551,11 @@ var _ = Describe("API test", func() {
})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp2.Choices)).To(Equal(1))
- fmt.Printf("\n--- %+v\n\n", resp2.Choices[0].Message)
- Expect(resp2.Choices[0].Message.ToolCalls).ToNot(BeNil())
- Expect(resp2.Choices[0].Message.ToolCalls[0]).ToNot(BeNil())
- Expect(resp2.Choices[0].Message.ToolCalls[0].Function.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.ToolCalls[0].Function.Name)
+ Expect(resp2.Choices[0].Message.FunctionCall).ToNot(BeNil())
+ Expect(resp2.Choices[0].Message.FunctionCall.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.FunctionCall.Name)
var res map[string]string
- err = json.Unmarshal([]byte(resp2.Choices[0].Message.ToolCalls[0].Function.Arguments), &res)
+ err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res)
Expect(err).ToNot(HaveOccurred())
Expect(res["location"]).To(Equal("San Francisco"), fmt.Sprint(res))
Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
@@ -611,7 +609,7 @@ var _ = Describe("API test", func() {
},
}
- application, err = startup.Startup(
+ bcl, ml, applicationConfig, err = startup.Startup(
append(commonOpts,
config.WithContext(c),
config.WithAudioDir(tmpdir),
@@ -622,7 +620,7 @@ var _ = Describe("API test", func() {
config.WithBackendAssetsOutput(tmpdir))...,
)
Expect(err).ToNot(HaveOccurred())
- app, err = App(application)
+ app, err = App(bcl, ml, applicationConfig)
Expect(err).ToNot(HaveOccurred())
go app.Listen("127.0.0.1:9090")
@@ -726,14 +724,14 @@ var _ = Describe("API test", func() {
var err error
- application, err = startup.Startup(
+ bcl, ml, applicationConfig, err = startup.Startup(
append(commonOpts,
config.WithExternalBackend("huggingface", os.Getenv("HUGGINGFACE_GRPC")),
config.WithContext(c),
config.WithModelPath(modelPath),
)...)
Expect(err).ToNot(HaveOccurred())
- app, err = App(application)
+ app, err = App(bcl, ml, applicationConfig)
Expect(err).ToNot(HaveOccurred())
go app.Listen("127.0.0.1:9090")
@@ -763,11 +761,6 @@ var _ = Describe("API test", func() {
Expect(len(models.Models)).To(Equal(6)) // If "config.yaml" should be included, this should be 8?
})
It("can generate completions via ggml", func() {
- bt, ok := os.LookupEnv("BUILD_TYPE")
- if ok && strings.ToLower(bt) == "metal" {
- Skip("GGML + Metal is known flaky, skip test temporarily")
- }
-
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel.ggml", Prompt: testPrompt})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
@@ -775,11 +768,6 @@ var _ = Describe("API test", func() {
})
It("can generate chat completions via ggml", func() {
- bt, ok := os.LookupEnv("BUILD_TYPE")
- if ok && strings.ToLower(bt) == "metal" {
- Skip("GGML + Metal is known flaky, skip test temporarily")
- }
-
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel.ggml", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
@@ -787,11 +775,6 @@ var _ = Describe("API test", func() {
})
It("can generate completions from model configs", func() {
- bt, ok := os.LookupEnv("BUILD_TYPE")
- if ok && strings.ToLower(bt) == "metal" {
- Skip("GGML + Metal is known flaky, skip test temporarily")
- }
-
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "gpt4all", Prompt: testPrompt})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
@@ -799,11 +782,6 @@ var _ = Describe("API test", func() {
})
It("can generate chat completions from model configs", func() {
- bt, ok := os.LookupEnv("BUILD_TYPE")
- if ok && strings.ToLower(bt) == "metal" {
- Skip("GGML + Metal is known flaky, skip test temporarily")
- }
-
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
@@ -890,9 +868,9 @@ var _ = Describe("API test", func() {
Context("backends", func() {
It("runs rwkv completion", func() {
- // if runtime.GOOS != "linux" {
- // Skip("test supported only on linux")
- // }
+ if runtime.GOOS != "linux" {
+ Skip("test supported only on linux")
+ }
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "rwkv_test", Prompt: "Count up to five: one, two, three, four,"})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices) > 0).To(BeTrue())
@@ -913,20 +891,17 @@ var _ = Describe("API test", func() {
}
Expect(err).ToNot(HaveOccurred())
-
- if len(response.Choices) > 0 {
- text += response.Choices[0].Text
- tokens++
- }
+ text += response.Choices[0].Text
+ tokens++
}
Expect(text).ToNot(BeEmpty())
Expect(text).To(ContainSubstring("five"))
Expect(tokens).ToNot(Or(Equal(1), Equal(0)))
})
It("runs rwkv chat completion", func() {
- // if runtime.GOOS != "linux" {
- // Skip("test supported only on linux")
- // }
+ if runtime.GOOS != "linux" {
+ Skip("test supported only on linux")
+ }
resp, err := client.CreateChatCompletion(context.TODO(),
openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}})
Expect(err).ToNot(HaveOccurred())
@@ -1035,14 +1010,14 @@ var _ = Describe("API test", func() {
c, cancel = context.WithCancel(context.Background())
var err error
- application, err = startup.Startup(
+ bcl, ml, applicationConfig, err = startup.Startup(
append(commonOpts,
config.WithContext(c),
config.WithModelPath(modelPath),
config.WithConfigFile(os.Getenv("CONFIG_FILE")))...,
)
Expect(err).ToNot(HaveOccurred())
- app, err = App(application)
+ app, err = App(bcl, ml, applicationConfig)
Expect(err).ToNot(HaveOccurred())
go app.Listen("127.0.0.1:9090")
@@ -1066,33 +1041,18 @@ var _ = Describe("API test", func() {
}
})
It("can generate chat completions from config file (list1)", func() {
- bt, ok := os.LookupEnv("BUILD_TYPE")
- if ok && strings.ToLower(bt) == "metal" {
- Skip("GGML + Metal is known flaky, skip test temporarily")
- }
-
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list1", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: testPrompt}}})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
})
It("can generate chat completions from config file (list2)", func() {
- bt, ok := os.LookupEnv("BUILD_TYPE")
- if ok && strings.ToLower(bt) == "metal" {
- Skip("GGML + Metal is known flaky, skip test temporarily")
- }
-
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list2", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: testPrompt}}})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
})
It("can generate edit completions from config file", func() {
- bt, ok := os.LookupEnv("BUILD_TYPE")
- if ok && strings.ToLower(bt) == "metal" {
- Skip("GGML + Metal is known flaky, skip test temporarily")
- }
-
request := openaigo.EditCreateRequestBody{
Model: "list2",
Instruction: "foo",
diff --git a/core/http/ctx/fiber.go b/core/http/ctx/fiber.go
index 99fbcde9..ffb63111 100644
--- a/core/http/ctx/fiber.go
+++ b/core/http/ctx/fiber.go
@@ -1,88 +1,43 @@
package fiberContext
import (
- "context"
- "encoding/json"
"fmt"
"strings"
- "github.com/go-skynet/LocalAI/core/config"
- "github.com/go-skynet/LocalAI/core/schema"
"github.com/go-skynet/LocalAI/pkg/model"
"github.com/gofiber/fiber/v2"
"github.com/rs/zerolog/log"
)
-type FiberContextExtractor struct {
- ml *model.ModelLoader
- appConfig *config.ApplicationConfig
-}
-
-func NewFiberContextExtractor(ml *model.ModelLoader, appConfig *config.ApplicationConfig) *FiberContextExtractor {
- return &FiberContextExtractor{
- ml: ml,
- appConfig: appConfig,
- }
-}
-
// ModelFromContext returns the model from the context
// If no model is specified, it will take the first available
// Takes a model string as input which should be the one received from the user request.
// It returns the model name resolved from the context and an error if any.
-func (fce *FiberContextExtractor) ModelFromContext(ctx *fiber.Ctx, modelInput string, firstModel bool) (string, error) {
- ctxPM := ctx.Params("model")
- if ctxPM != "" {
- log.Debug().Msgf("[FCE] Overriding param modelInput %q with ctx.Params value %q", modelInput, ctxPM)
- modelInput = ctxPM
+func ModelFromContext(ctx *fiber.Ctx, loader *model.ModelLoader, modelInput string, firstModel bool) (string, error) {
+ if ctx.Params("model") != "" {
+ modelInput = ctx.Params("model")
}
// Set model from bearer token, if available
- bearer := strings.TrimPrefix(ctx.Get("authorization"), "Bearer ")
- bearerExists := bearer != "" && fce.ml.ExistsInModelPath(bearer)
+ bearer := strings.TrimLeft(ctx.Get("authorization"), "Bearer ")
+ bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
// If no model was specified, take the first available
if modelInput == "" && !bearerExists && firstModel {
- models, _ := fce.ml.ListModels()
+ models, _ := loader.ListModels()
if len(models) > 0 {
modelInput = models[0]
- log.Debug().Msgf("[FCE] No model specified, using first available: %s", modelInput)
+ log.Debug().Msgf("No model specified, using: %s", modelInput)
} else {
- log.Warn().Msgf("[FCE] No model specified, none available")
- return "", fmt.Errorf("[fce] no model specified, none available")
+ log.Debug().Msgf("No model specified, returning error")
+ return "", fmt.Errorf("no model specified")
}
}
// If a model is found in bearer token takes precedence
if bearerExists {
- log.Debug().Msgf("[FCE] Using model from bearer token: %s", bearer)
+ log.Debug().Msgf("Using model from bearer token: %s", bearer)
modelInput = bearer
}
-
- if modelInput == "" {
- log.Warn().Msg("[FCE] modelInput is empty")
- }
return modelInput, nil
}
-
-// TODO: Do we still need the first return value?
-func (fce *FiberContextExtractor) OpenAIRequestFromContext(c *fiber.Ctx, firstModel bool) (string, *schema.OpenAIRequest, error) {
- input := new(schema.OpenAIRequest)
-
- // Get input data from the request body
- if err := c.BodyParser(input); err != nil {
- return "", nil, fmt.Errorf("failed parsing request body: %w", err)
- }
-
- received, _ := json.Marshal(input)
-
- ctx, cancel := context.WithCancel(fce.appConfig.Context)
- input.Context = ctx
- input.Cancel = cancel
-
- log.Debug().Msgf("Request received: %s", string(received))
-
- var err error
- input.Model, err = fce.ModelFromContext(c, input.Model, firstModel)
-
- return input.Model, input, err
-}
diff --git a/core/http/endpoints/elevenlabs/tts.go b/core/http/endpoints/elevenlabs/tts.go
index 4f5db463..841f9b5f 100644
--- a/core/http/endpoints/elevenlabs/tts.go
+++ b/core/http/endpoints/elevenlabs/tts.go
@@ -2,7 +2,9 @@ package elevenlabs
import (
"github.com/go-skynet/LocalAI/core/backend"
+ "github.com/go-skynet/LocalAI/core/config"
fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
+ "github.com/go-skynet/LocalAI/pkg/model"
"github.com/go-skynet/LocalAI/core/schema"
"github.com/gofiber/fiber/v2"
@@ -15,7 +17,7 @@ import (
// @Param request body schema.TTSRequest true "query params"
// @Success 200 {string} binary "Response"
// @Router /v1/text-to-speech/{voice-id} [post]
-func TTSEndpoint(fce *fiberContext.FiberContextExtractor, ttsbs *backend.TextToSpeechBackendService) func(c *fiber.Ctx) error {
+func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
input := new(schema.ElevenLabsTTSRequest)
@@ -26,21 +28,34 @@ func TTSEndpoint(fce *fiberContext.FiberContextExtractor, ttsbs *backend.TextToS
return err
}
- var err error
- input.ModelID, err = fce.ModelFromContext(c, input.ModelID, false)
+ modelFile, err := fiberContext.ModelFromContext(c, ml, input.ModelID, false)
if err != nil {
+ modelFile = input.ModelID
log.Warn().Msgf("Model not found in context: %s", input.ModelID)
}
- responseChannel := ttsbs.TextToAudioFile(&schema.TTSRequest{
- Model: input.ModelID,
- Voice: voiceID,
- Input: input.Text,
- })
- rawValue := <-responseChannel
- if rawValue.Error != nil {
- return rawValue.Error
+ cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
+ config.LoadOptionDebug(appConfig.Debug),
+ config.LoadOptionThreads(appConfig.Threads),
+ config.LoadOptionContextSize(appConfig.ContextSize),
+ config.LoadOptionF16(appConfig.F16),
+ )
+ if err != nil {
+ modelFile = input.ModelID
+ log.Warn().Msgf("Model not found in context: %s", input.ModelID)
+ } else {
+ if input.ModelID != "" {
+ modelFile = input.ModelID
+ } else {
+ modelFile = cfg.Model
+ }
}
- return c.Download(*rawValue.Value)
+ log.Debug().Msgf("Request for model: %s", modelFile)
+
+ filePath, _, err := backend.ModelTTS(cfg.Backend, input.Text, modelFile, voiceID, ml, appConfig, *cfg)
+ if err != nil {
+ return err
+ }
+ return c.Download(filePath)
}
}
diff --git a/core/http/endpoints/localai/backend_monitor.go b/core/http/endpoints/localai/backend_monitor.go
index dac20388..8c7a664a 100644
--- a/core/http/endpoints/localai/backend_monitor.go
+++ b/core/http/endpoints/localai/backend_monitor.go
@@ -6,7 +6,7 @@ import (
"github.com/gofiber/fiber/v2"
)
-func BackendMonitorEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ctx) error {
+func BackendMonitorEndpoint(bm services.BackendMonitor) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
input := new(schema.BackendMonitorRequest)
@@ -23,7 +23,7 @@ func BackendMonitorEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ct
}
}
-func BackendShutdownEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ctx) error {
+func BackendShutdownEndpoint(bm services.BackendMonitor) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
input := new(schema.BackendMonitorRequest)
// Get input data from the request body
diff --git a/core/http/endpoints/localai/tts.go b/core/http/endpoints/localai/tts.go
index df7841fb..7822e024 100644
--- a/core/http/endpoints/localai/tts.go
+++ b/core/http/endpoints/localai/tts.go
@@ -2,7 +2,9 @@ package localai
import (
"github.com/go-skynet/LocalAI/core/backend"
+ "github.com/go-skynet/LocalAI/core/config"
fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
+ "github.com/go-skynet/LocalAI/pkg/model"
"github.com/go-skynet/LocalAI/core/schema"
"github.com/gofiber/fiber/v2"
@@ -14,26 +16,45 @@ import (
// @Param request body schema.TTSRequest true "query params"
// @Success 200 {string} binary "Response"
// @Router /v1/audio/speech [post]
-func TTSEndpoint(fce *fiberContext.FiberContextExtractor, ttsbs *backend.TextToSpeechBackendService) func(c *fiber.Ctx) error {
+func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
- var err error
+
input := new(schema.TTSRequest)
// Get input data from the request body
- if err = c.BodyParser(input); err != nil {
+ if err := c.BodyParser(input); err != nil {
return err
}
- input.Model, err = fce.ModelFromContext(c, input.Model, false)
+ modelFile, err := fiberContext.ModelFromContext(c, ml, input.Model, false)
if err != nil {
+ modelFile = input.Model
log.Warn().Msgf("Model not found in context: %s", input.Model)
}
- responseChannel := ttsbs.TextToAudioFile(input)
- rawValue := <-responseChannel
- if rawValue.Error != nil {
- return rawValue.Error
+ cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
+ config.LoadOptionDebug(appConfig.Debug),
+ config.LoadOptionThreads(appConfig.Threads),
+ config.LoadOptionContextSize(appConfig.ContextSize),
+ config.LoadOptionF16(appConfig.F16),
+ )
+
+ if err != nil {
+ modelFile = input.Model
+ log.Warn().Msgf("Model not found in context: %s", input.Model)
+ } else {
+ modelFile = cfg.Model
}
- return c.Download(*rawValue.Value)
+ log.Debug().Msgf("Request for model: %s", modelFile)
+
+ if input.Backend != "" {
+ cfg.Backend = input.Backend
+ }
+
+ filePath, _, err := backend.ModelTTS(cfg.Backend, input.Input, modelFile, input.Voice, ml, appConfig, *cfg)
+ if err != nil {
+ return err
+ }
+ return c.Download(filePath)
}
}
diff --git a/core/http/endpoints/openai/assistant.go b/core/http/endpoints/openai/assistant.go
index 72cb8b4a..dceb3789 100644
--- a/core/http/endpoints/openai/assistant.go
+++ b/core/http/endpoints/openai/assistant.go
@@ -339,7 +339,7 @@ func CreateAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model
}
}
- return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find assistantID %q", assistantID))
+ return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find "))
}
}
diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go
index a240b024..36d1142b 100644
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -5,11 +5,17 @@ import (
"bytes"
"encoding/json"
"fmt"
+ "strings"
+ "time"
- fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
+ "github.com/go-skynet/LocalAI/core/backend"
+ "github.com/go-skynet/LocalAI/core/config"
"github.com/go-skynet/LocalAI/core/schema"
- "github.com/go-skynet/LocalAI/core/services"
+ "github.com/go-skynet/LocalAI/pkg/grammar"
+ model "github.com/go-skynet/LocalAI/pkg/model"
+ "github.com/go-skynet/LocalAI/pkg/utils"
"github.com/gofiber/fiber/v2"
+ "github.com/google/uuid"
"github.com/rs/zerolog/log"
"github.com/valyala/fasthttp"
)
@@ -19,82 +25,412 @@ import (
// @Param request body schema.OpenAIRequest true "query params"
// @Success 200 {object} schema.OpenAIResponse "Response"
// @Router /v1/chat/completions [post]
-func ChatEndpoint(fce *fiberContext.FiberContextExtractor, oais *services.OpenAIService) func(c *fiber.Ctx) error {
+func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error {
+ emptyMessage := ""
+ id := uuid.New().String()
+ created := int(time.Now().Unix())
+
+ process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
+ initialMessage := schema.OpenAIResponse{
+ ID: id,
+ Created: created,
+ Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
+ Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &emptyMessage}}},
+ Object: "chat.completion.chunk",
+ }
+ responses <- initialMessage
+
+ ComputeChoices(req, s, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
+ resp := schema.OpenAIResponse{
+ ID: id,
+ Created: created,
+ Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
+ Choices: []schema.Choice{{Delta: &schema.Message{Content: &s}, Index: 0}},
+ Object: "chat.completion.chunk",
+ Usage: schema.OpenAIUsage{
+ PromptTokens: usage.Prompt,
+ CompletionTokens: usage.Completion,
+ TotalTokens: usage.Prompt + usage.Completion,
+ },
+ }
+
+ responses <- resp
+ return true
+ })
+ close(responses)
+ }
+ processTools := func(noAction string, prompt string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
+ result := ""
+ _, tokenUsage, _ := ComputeChoices(req, prompt, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
+ result += s
+ // TODO: Change generated BNF grammar to be compliant with the schema so we can
+ // stream the result token by token here.
+ return true
+ })
+
+ results := parseFunctionCall(result, config.FunctionsConfig.ParallelCalls)
+ noActionToRun := len(results) > 0 && results[0].name == noAction
+
+ switch {
+ case noActionToRun:
+ initialMessage := schema.OpenAIResponse{
+ ID: id,
+ Created: created,
+ Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
+ Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &emptyMessage}}},
+ Object: "chat.completion.chunk",
+ }
+ responses <- initialMessage
+
+ result, err := handleQuestion(config, req, ml, startupOptions, results[0].arguments, prompt)
+ if err != nil {
+ log.Error().Err(err).Msg("error handling question")
+ return
+ }
+
+ resp := schema.OpenAIResponse{
+ ID: id,
+ Created: created,
+ Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
+ Choices: []schema.Choice{{Delta: &schema.Message{Content: &result}, Index: 0}},
+ Object: "chat.completion.chunk",
+ Usage: schema.OpenAIUsage{
+ PromptTokens: tokenUsage.Prompt,
+ CompletionTokens: tokenUsage.Completion,
+ TotalTokens: tokenUsage.Prompt + tokenUsage.Completion,
+ },
+ }
+
+ responses <- resp
+
+ default:
+ for i, ss := range results {
+ name, args := ss.name, ss.arguments
+
+ initialMessage := schema.OpenAIResponse{
+ ID: id,
+ Created: created,
+ Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
+ Choices: []schema.Choice{{
+ Delta: &schema.Message{
+ Role: "assistant",
+ ToolCalls: []schema.ToolCall{
+ {
+ Index: i,
+ ID: id,
+ Type: "function",
+ FunctionCall: schema.FunctionCall{
+ Name: name,
+ },
+ },
+ },
+ }}},
+ Object: "chat.completion.chunk",
+ }
+ responses <- initialMessage
+
+ responses <- schema.OpenAIResponse{
+ ID: id,
+ Created: created,
+ Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
+ Choices: []schema.Choice{{
+ Delta: &schema.Message{
+ Role: "assistant",
+ ToolCalls: []schema.ToolCall{
+ {
+ Index: i,
+ ID: id,
+ Type: "function",
+ FunctionCall: schema.FunctionCall{
+ Arguments: args,
+ },
+ },
+ },
+ }}},
+ Object: "chat.completion.chunk",
+ }
+ }
+ }
+
+ close(responses)
+ }
+
return func(c *fiber.Ctx) error {
- _, request, err := fce.OpenAIRequestFromContext(c, false)
+ processFunctions := false
+ funcs := grammar.Functions{}
+ modelFile, input, err := readRequest(c, ml, startupOptions, true)
if err != nil {
- return fmt.Errorf("failed reading parameters from request: %w", err)
+ return fmt.Errorf("failed reading parameters from request:%w", err)
}
- traceID, finalResultChannel, _, tokenChannel, err := oais.Chat(request, false, request.Stream)
+ config, input, err := mergeRequestWithConfig(modelFile, input, cl, ml, startupOptions.Debug, startupOptions.Threads, startupOptions.ContextSize, startupOptions.F16)
if err != nil {
- return err
+ return fmt.Errorf("failed reading parameters from request:%w", err)
+ }
+ log.Debug().Msgf("Configuration read: %+v", config)
+
+ // Allow the user to set custom actions via config file
+ // to be "embedded" in each model
+ noActionName := "answer"
+ noActionDescription := "use this action to answer without performing any action"
+
+ if config.FunctionsConfig.NoActionFunctionName != "" {
+ noActionName = config.FunctionsConfig.NoActionFunctionName
+ }
+ if config.FunctionsConfig.NoActionDescriptionName != "" {
+ noActionDescription = config.FunctionsConfig.NoActionDescriptionName
}
- if request.Stream {
+ if input.ResponseFormat.Type == "json_object" {
+ input.Grammar = grammar.JSONBNF
+ }
- log.Debug().Msgf("Chat Stream request received")
+ config.Grammar = input.Grammar
+ // process functions if we have any defined or if we have a function call string
+ if len(input.Functions) > 0 && config.ShouldUseFunctions() {
+ log.Debug().Msgf("Response needs to process functions")
+
+ processFunctions = true
+
+ noActionGrammar := grammar.Function{
+ Name: noActionName,
+ Description: noActionDescription,
+ Parameters: map[string]interface{}{
+ "properties": map[string]interface{}{
+ "message": map[string]interface{}{
+ "type": "string",
+ "description": "The message to reply the user with",
+ }},
+ },
+ }
+
+ // Append the no action function
+ funcs = append(funcs, input.Functions...)
+ if !config.FunctionsConfig.DisableNoAction {
+ funcs = append(funcs, noActionGrammar)
+ }
+
+ // Force picking one of the functions by the request
+ if config.FunctionToCall() != "" {
+ funcs = funcs.Select(config.FunctionToCall())
+ }
+
+ // Update input grammar
+ jsStruct := funcs.ToJSONStructure()
+ config.Grammar = jsStruct.Grammar("", config.FunctionsConfig.ParallelCalls)
+ } else if input.JSONFunctionGrammarObject != nil {
+ config.Grammar = input.JSONFunctionGrammarObject.Grammar("", config.FunctionsConfig.ParallelCalls)
+ }
+
+ // functions are not supported in stream mode (yet?)
+ toStream := input.Stream
+
+ log.Debug().Msgf("Parameters: %+v", config)
+
+ var predInput string
+
+ // If we are using the tokenizer template, we don't need to process the messages
+ // unless we are processing functions
+ if !config.TemplateConfig.UseTokenizerTemplate || processFunctions {
+
+ suppressConfigSystemPrompt := false
+ mess := []string{}
+ for messageIndex, i := range input.Messages {
+ var content string
+ role := i.Role
+
+ // if function call, we might want to customize the role so we can display better that the "assistant called a json action"
+ // if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request
+ if (i.FunctionCall != nil || i.ToolCalls != nil) && i.Role == "assistant" {
+ roleFn := "assistant_function_call"
+ r := config.Roles[roleFn]
+ if r != "" {
+ role = roleFn
+ }
+ }
+ r := config.Roles[role]
+ contentExists := i.Content != nil && i.StringContent != ""
+
+ fcall := i.FunctionCall
+ if len(i.ToolCalls) > 0 {
+ fcall = i.ToolCalls
+ }
+
+ // First attempt to populate content via a chat message specific template
+ if config.TemplateConfig.ChatMessage != "" {
+ chatMessageData := model.ChatMessageTemplateData{
+ SystemPrompt: config.SystemPrompt,
+ Role: r,
+ RoleName: role,
+ Content: i.StringContent,
+ FunctionCall: fcall,
+ FunctionName: i.Name,
+ LastMessage: messageIndex == (len(input.Messages) - 1),
+ Function: config.Grammar != "" && (messageIndex == (len(input.Messages) - 1)),
+ MessageIndex: messageIndex,
+ }
+ templatedChatMessage, err := ml.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData)
+ if err != nil {
+ log.Error().Err(err).Interface("message", chatMessageData).Str("template", config.TemplateConfig.ChatMessage).Msg("error processing message with template, skipping")
+ } else {
+ if templatedChatMessage == "" {
+ log.Warn().Msgf("template \"%s\" produced blank output for %+v. Skipping!", config.TemplateConfig.ChatMessage, chatMessageData)
+ continue // TODO: This continue is here intentionally to skip over the line `mess = append(mess, content)` below, and to prevent the sprintf
+ }
+ log.Debug().Msgf("templated message for chat: %s", templatedChatMessage)
+ content = templatedChatMessage
+ }
+ }
+
+ marshalAnyRole := func(f any) {
+ j, err := json.Marshal(f)
+ if err == nil {
+ if contentExists {
+ content += "\n" + fmt.Sprint(r, " ", string(j))
+ } else {
+ content = fmt.Sprint(r, " ", string(j))
+ }
+ }
+ }
+ marshalAny := func(f any) {
+ j, err := json.Marshal(f)
+ if err == nil {
+ if contentExists {
+ content += "\n" + string(j)
+ } else {
+ content = string(j)
+ }
+ }
+ }
+ // If this model doesn't have such a template, or if that template fails to return a value, template at the message level.
+ if content == "" {
+ if r != "" {
+ if contentExists {
+ content = fmt.Sprint(r, i.StringContent)
+ }
+
+ if i.FunctionCall != nil {
+ marshalAnyRole(i.FunctionCall)
+ }
+ if i.ToolCalls != nil {
+ marshalAnyRole(i.ToolCalls)
+ }
+ } else {
+ if contentExists {
+ content = fmt.Sprint(i.StringContent)
+ }
+ if i.FunctionCall != nil {
+ marshalAny(i.FunctionCall)
+ }
+ if i.ToolCalls != nil {
+ marshalAny(i.ToolCalls)
+ }
+ }
+ // Special Handling: System. We care if it was printed at all, not the r branch, so check seperately
+ if contentExists && role == "system" {
+ suppressConfigSystemPrompt = true
+ }
+ }
+
+ mess = append(mess, content)
+ }
+
+ predInput = strings.Join(mess, "\n")
+ log.Debug().Msgf("Prompt (before templating): %s", predInput)
+
+ templateFile := ""
+
+ // A model can have a "file.bin.tmpl" file associated with a prompt template prefix
+ if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
+ templateFile = config.Model
+ }
+
+ if config.TemplateConfig.Chat != "" && !processFunctions {
+ templateFile = config.TemplateConfig.Chat
+ }
+
+ if config.TemplateConfig.Functions != "" && processFunctions {
+ templateFile = config.TemplateConfig.Functions
+ }
+
+ if templateFile != "" {
+ templatedInput, err := ml.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{
+ SystemPrompt: config.SystemPrompt,
+ SuppressSystemPrompt: suppressConfigSystemPrompt,
+ Input: predInput,
+ Functions: funcs,
+ })
+ if err == nil {
+ predInput = templatedInput
+ log.Debug().Msgf("Template found, input modified to: %s", predInput)
+ } else {
+ log.Debug().Msgf("Template failed loading: %s", err.Error())
+ }
+ }
+
+ log.Debug().Msgf("Prompt (after templating): %s", predInput)
+ if processFunctions {
+ log.Debug().Msgf("Grammar: %+v", config.Grammar)
+ }
+ }
+
+ switch {
+ case toStream:
+
+ log.Debug().Msgf("Stream request received")
c.Context().SetContentType("text/event-stream")
//c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8)
- //
+ // c.Set("Content-Type", "text/event-stream")
c.Set("Cache-Control", "no-cache")
c.Set("Connection", "keep-alive")
c.Set("Transfer-Encoding", "chunked")
+ responses := make(chan schema.OpenAIResponse)
+
+ if !processFunctions {
+ go process(predInput, input, config, ml, responses)
+ } else {
+ go processTools(noActionName, predInput, input, config, ml, responses)
+ }
+
c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
usage := &schema.OpenAIUsage{}
toolsCalled := false
- for ev := range tokenChannel {
- if ev.Error != nil {
- log.Debug().Err(ev.Error).Msg("chat streaming responseChannel error")
- request.Cancel()
- break
- }
- usage = &ev.Value.Usage // Copy a pointer to the latest usage chunk so that the stop message can reference it
-
- if len(ev.Value.Choices[0].Delta.ToolCalls) > 0 {
+ for ev := range responses {
+ usage = &ev.Usage // Copy a pointer to the latest usage chunk so that the stop message can reference it
+ if len(ev.Choices[0].Delta.ToolCalls) > 0 {
toolsCalled = true
}
var buf bytes.Buffer
enc := json.NewEncoder(&buf)
- if ev.Error != nil {
- log.Debug().Err(ev.Error).Msg("[ChatEndpoint] error to debug during tokenChannel handler")
- enc.Encode(ev.Error)
- } else {
- enc.Encode(ev.Value)
- }
- log.Debug().Msgf("chat streaming sending chunk: %s", buf.String())
+ enc.Encode(ev)
+ log.Debug().Msgf("Sending chunk: %s", buf.String())
_, err := fmt.Fprintf(w, "data: %v\n", buf.String())
if err != nil {
- log.Debug().Err(err).Msgf("Sending chunk failed")
- request.Cancel()
- break
- }
- err = w.Flush()
- if err != nil {
- log.Debug().Msg("error while flushing, closing connection")
- request.Cancel()
+ log.Debug().Msgf("Sending chunk failed: %v", err)
+ input.Cancel()
break
}
+ w.Flush()
}
finishReason := "stop"
if toolsCalled {
finishReason = "tool_calls"
- } else if toolsCalled && len(request.Tools) == 0 {
+ } else if toolsCalled && len(input.Tools) == 0 {
finishReason = "function_call"
}
resp := &schema.OpenAIResponse{
- ID: traceID.ID,
- Created: traceID.Created,
- Model: request.Model, // we have to return what the user sent here, due to OpenAI spec.
+ ID: id,
+ Created: created,
+ Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []schema.Choice{
{
FinishReason: finishReason,
Index: 0,
- Delta: &schema.Message{Content: ""},
+ Delta: &schema.Message{Content: &emptyMessage},
}},
Object: "chat.completion.chunk",
Usage: *usage,
@@ -105,21 +441,202 @@ func ChatEndpoint(fce *fiberContext.FiberContextExtractor, oais *services.OpenAI
w.WriteString("data: [DONE]\n\n")
w.Flush()
}))
-
return nil
+
+ // no streaming mode
+ default:
+ result, tokenUsage, err := ComputeChoices(input, predInput, config, startupOptions, ml, func(s string, c *[]schema.Choice) {
+ if !processFunctions {
+ // no function is called, just reply and use stop as finish reason
+ *c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
+ return
+ }
+
+ results := parseFunctionCall(s, config.FunctionsConfig.ParallelCalls)
+ noActionsToRun := len(results) > 0 && results[0].name == noActionName
+
+ switch {
+ case noActionsToRun:
+ result, err := handleQuestion(config, input, ml, startupOptions, results[0].arguments, predInput)
+ if err != nil {
+ log.Error().Err(err).Msg("error handling question")
+ return
+ }
+ *c = append(*c, schema.Choice{
+ Message: &schema.Message{Role: "assistant", Content: &result}})
+ default:
+ toolChoice := schema.Choice{
+ Message: &schema.Message{
+ Role: "assistant",
+ },
+ }
+
+ if len(input.Tools) > 0 {
+ toolChoice.FinishReason = "tool_calls"
+ }
+
+ for _, ss := range results {
+ name, args := ss.name, ss.arguments
+ if len(input.Tools) > 0 {
+ // If we are using tools, we condense the function calls into
+ // a single response choice with all the tools
+ toolChoice.Message.ToolCalls = append(toolChoice.Message.ToolCalls,
+ schema.ToolCall{
+ ID: id,
+ Type: "function",
+ FunctionCall: schema.FunctionCall{
+ Name: name,
+ Arguments: args,
+ },
+ },
+ )
+ } else {
+ // otherwise we return more choices directly
+ *c = append(*c, schema.Choice{
+ FinishReason: "function_call",
+ Message: &schema.Message{
+ Role: "assistant",
+ FunctionCall: map[string]interface{}{
+ "name": name,
+ "arguments": args,
+ },
+ },
+ })
+ }
+ }
+
+ if len(input.Tools) > 0 {
+ // we need to append our result if we are using tools
+ *c = append(*c, toolChoice)
+ }
+ }
+
+ }, nil)
+ if err != nil {
+ return err
+ }
+
+ resp := &schema.OpenAIResponse{
+ ID: id,
+ Created: created,
+ Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
+ Choices: result,
+ Object: "chat.completion",
+ Usage: schema.OpenAIUsage{
+ PromptTokens: tokenUsage.Prompt,
+ CompletionTokens: tokenUsage.Completion,
+ TotalTokens: tokenUsage.Prompt + tokenUsage.Completion,
+ },
+ }
+ respData, _ := json.Marshal(resp)
+ log.Debug().Msgf("Response: %s", respData)
+
+ // Return the prediction in the response body
+ return c.JSON(resp)
}
- // TODO is this proper to have exclusive from Stream, or do we need to issue both responses?
- rawResponse := <-finalResultChannel
-
- if rawResponse.Error != nil {
- return rawResponse.Error
- }
-
- jsonResult, _ := json.Marshal(rawResponse.Value)
- log.Debug().Str("jsonResult", string(jsonResult)).Msg("Chat Final Response")
-
- // Return the prediction in the response body
- return c.JSON(rawResponse.Value)
}
}
+
+func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, ml *model.ModelLoader, o *config.ApplicationConfig, args, prompt string) (string, error) {
+ log.Debug().Msgf("nothing to do, computing a reply")
+
+ // If there is a message that the LLM already sends as part of the JSON reply, use it
+ arguments := map[string]interface{}{}
+ json.Unmarshal([]byte(args), &arguments)
+ m, exists := arguments["message"]
+ if exists {
+ switch message := m.(type) {
+ case string:
+ if message != "" {
+ log.Debug().Msgf("Reply received from LLM: %s", message)
+ message = backend.Finetune(*config, prompt, message)
+ log.Debug().Msgf("Reply received from LLM(finetuned): %s", message)
+
+ return message, nil
+ }
+ }
+ }
+
+ log.Debug().Msgf("No action received from LLM, without a message, computing a reply")
+ // Otherwise ask the LLM to understand the JSON output and the context, and return a message
+ // Note: This costs (in term of CPU/GPU) another computation
+ config.Grammar = ""
+ images := []string{}
+ for _, m := range input.Messages {
+ images = append(images, m.StringImages...)
+ }
+
+ predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, ml, *config, o, nil)
+ if err != nil {
+ log.Error().Err(err).Msg("model inference failed")
+ return "", err
+ }
+
+ prediction, err := predFunc()
+ if err != nil {
+ log.Error().Err(err).Msg("prediction failed")
+ return "", err
+ }
+ return backend.Finetune(*config, prompt, prediction.Response), nil
+}
+
+type funcCallResults struct {
+ name string
+ arguments string
+}
+
+func parseFunctionCall(llmresult string, multipleResults bool) []funcCallResults {
+ results := []funcCallResults{}
+
+ // TODO: use generics to avoid this code duplication
+ if multipleResults {
+ ss := []map[string]interface{}{}
+ s := utils.EscapeNewLines(llmresult)
+ json.Unmarshal([]byte(s), &ss)
+ log.Debug().Msgf("Function return: %s %+v", s, ss)
+
+ for _, s := range ss {
+ func_name, ok := s["function"]
+ if !ok {
+ continue
+ }
+ args, ok := s["arguments"]
+ if !ok {
+ continue
+ }
+ d, _ := json.Marshal(args)
+ funcName, ok := func_name.(string)
+ if !ok {
+ continue
+ }
+ results = append(results, funcCallResults{name: funcName, arguments: string(d)})
+ }
+ } else {
+ // As we have to change the result before processing, we can't stream the answer token-by-token (yet?)
+ ss := map[string]interface{}{}
+ // This prevent newlines to break JSON parsing for clients
+ s := utils.EscapeNewLines(llmresult)
+ json.Unmarshal([]byte(s), &ss)
+ log.Debug().Msgf("Function return: %s %+v", s, ss)
+
+ // The grammar defines the function name as "function", while OpenAI returns "name"
+ func_name, ok := ss["function"]
+ if !ok {
+ return results
+ }
+ // Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object
+ args, ok := ss["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix)
+ if !ok {
+ return results
+ }
+ d, _ := json.Marshal(args)
+ funcName, ok := func_name.(string)
+ if !ok {
+ return results
+ }
+ results = append(results, funcCallResults{name: funcName, arguments: string(d)})
+ }
+
+ return results
+}
diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go
index d8b412a9..69923475 100644
--- a/core/http/endpoints/openai/completion.go
+++ b/core/http/endpoints/openai/completion.go
@@ -4,13 +4,18 @@ import (
"bufio"
"bytes"
"encoding/json"
+ "errors"
"fmt"
+ "time"
- fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
- "github.com/go-skynet/LocalAI/core/services"
+ "github.com/go-skynet/LocalAI/core/backend"
+ "github.com/go-skynet/LocalAI/core/config"
"github.com/go-skynet/LocalAI/core/schema"
+ "github.com/go-skynet/LocalAI/pkg/grammar"
+ model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/gofiber/fiber/v2"
+ "github.com/google/uuid"
"github.com/rs/zerolog/log"
"github.com/valyala/fasthttp"
)
@@ -20,50 +25,116 @@ import (
// @Param request body schema.OpenAIRequest true "query params"
// @Success 200 {object} schema.OpenAIResponse "Response"
// @Router /v1/completions [post]
-func CompletionEndpoint(fce *fiberContext.FiberContextExtractor, oais *services.OpenAIService) func(c *fiber.Ctx) error {
+func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+ id := uuid.New().String()
+ created := int(time.Now().Unix())
+
+ process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
+ ComputeChoices(req, s, config, appConfig, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
+ resp := schema.OpenAIResponse{
+ ID: id,
+ Created: created,
+ Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
+ Choices: []schema.Choice{
+ {
+ Index: 0,
+ Text: s,
+ },
+ },
+ Object: "text_completion",
+ Usage: schema.OpenAIUsage{
+ PromptTokens: usage.Prompt,
+ CompletionTokens: usage.Completion,
+ TotalTokens: usage.Prompt + usage.Completion,
+ },
+ }
+ log.Debug().Msgf("Sending goroutine: %s", s)
+
+ responses <- resp
+ return true
+ })
+ close(responses)
+ }
+
return func(c *fiber.Ctx) error {
- _, request, err := fce.OpenAIRequestFromContext(c, false)
+ modelFile, input, err := readRequest(c, ml, appConfig, true)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
- log.Debug().Msgf("`OpenAIRequest`: %+v", request)
+ log.Debug().Msgf("`input`: %+v", input)
- traceID, finalResultChannel, _, _, tokenChannel, err := oais.Completion(request, false, request.Stream)
+ config, input, err := mergeRequestWithConfig(modelFile, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16)
if err != nil {
- return err
+ return fmt.Errorf("failed reading parameters from request:%w", err)
}
- if request.Stream {
- log.Debug().Msgf("Completion Stream request received")
+ if input.ResponseFormat.Type == "json_object" {
+ input.Grammar = grammar.JSONBNF
+ }
+ config.Grammar = input.Grammar
+
+ log.Debug().Msgf("Parameter Config: %+v", config)
+
+ if input.Stream {
+ log.Debug().Msgf("Stream request received")
c.Context().SetContentType("text/event-stream")
//c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8)
//c.Set("Content-Type", "text/event-stream")
c.Set("Cache-Control", "no-cache")
c.Set("Connection", "keep-alive")
c.Set("Transfer-Encoding", "chunked")
+ }
+
+ templateFile := ""
+
+ // A model can have a "file.bin.tmpl" file associated with a prompt template prefix
+ if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
+ templateFile = config.Model
+ }
+
+ if config.TemplateConfig.Completion != "" {
+ templateFile = config.TemplateConfig.Completion
+ }
+
+ if input.Stream {
+ if len(config.PromptStrings) > 1 {
+ return errors.New("cannot handle more than 1 `PromptStrings` when Streaming")
+ }
+
+ predInput := config.PromptStrings[0]
+
+ if templateFile != "" {
+ templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
+ Input: predInput,
+ })
+ if err == nil {
+ predInput = templatedInput
+ log.Debug().Msgf("Template found, input modified to: %s", predInput)
+ }
+ }
+
+ responses := make(chan schema.OpenAIResponse)
+
+ go process(predInput, input, config, ml, responses)
c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
- for ev := range tokenChannel {
+
+ for ev := range responses {
var buf bytes.Buffer
enc := json.NewEncoder(&buf)
- if ev.Error != nil {
- log.Debug().Msgf("[CompletionEndpoint] error to debug during tokenChannel handler: %q", ev.Error)
- enc.Encode(ev.Error)
- } else {
- enc.Encode(ev.Value)
- }
+ enc.Encode(ev)
- log.Debug().Msgf("completion streaming sending chunk: %s", buf.String())
+ log.Debug().Msgf("Sending chunk: %s", buf.String())
fmt.Fprintf(w, "data: %v\n", buf.String())
w.Flush()
}
resp := &schema.OpenAIResponse{
- ID: traceID.ID,
- Created: traceID.Created,
- Model: request.Model, // we have to return what the user sent here, due to OpenAI spec.
+ ID: id,
+ Created: created,
+ Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []schema.Choice{
{
Index: 0,
@@ -80,15 +151,55 @@ func CompletionEndpoint(fce *fiberContext.FiberContextExtractor, oais *services.
}))
return nil
}
- // TODO is this proper to have exclusive from Stream, or do we need to issue both responses?
- rawResponse := <-finalResultChannel
- if rawResponse.Error != nil {
- return rawResponse.Error
+
+ var result []schema.Choice
+
+ totalTokenUsage := backend.TokenUsage{}
+
+ for k, i := range config.PromptStrings {
+ if templateFile != "" {
+ // A model can have a "file.bin.tmpl" file associated with a prompt template prefix
+ templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
+ SystemPrompt: config.SystemPrompt,
+ Input: i,
+ })
+ if err == nil {
+ i = templatedInput
+ log.Debug().Msgf("Template found, input modified to: %s", i)
+ }
+ }
+
+ r, tokenUsage, err := ComputeChoices(
+ input, i, config, appConfig, ml, func(s string, c *[]schema.Choice) {
+ *c = append(*c, schema.Choice{Text: s, FinishReason: "stop", Index: k})
+ }, nil)
+ if err != nil {
+ return err
+ }
+
+ totalTokenUsage.Prompt += tokenUsage.Prompt
+ totalTokenUsage.Completion += tokenUsage.Completion
+
+ result = append(result, r...)
}
- jsonResult, _ := json.Marshal(rawResponse.Value)
+
+ resp := &schema.OpenAIResponse{
+ ID: id,
+ Created: created,
+ Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
+ Choices: result,
+ Object: "text_completion",
+ Usage: schema.OpenAIUsage{
+ PromptTokens: totalTokenUsage.Prompt,
+ CompletionTokens: totalTokenUsage.Completion,
+ TotalTokens: totalTokenUsage.Prompt + totalTokenUsage.Completion,
+ },
+ }
+
+ jsonResult, _ := json.Marshal(resp)
log.Debug().Msgf("Response: %s", jsonResult)
// Return the prediction in the response body
- return c.JSON(rawResponse.Value)
+ return c.JSON(resp)
}
}
diff --git a/core/http/endpoints/openai/edit.go b/core/http/endpoints/openai/edit.go
index a33050dd..25497095 100644
--- a/core/http/endpoints/openai/edit.go
+++ b/core/http/endpoints/openai/edit.go
@@ -3,36 +3,92 @@ package openai
import (
"encoding/json"
"fmt"
+ "time"
- fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
- "github.com/go-skynet/LocalAI/core/services"
+ "github.com/go-skynet/LocalAI/core/backend"
+ "github.com/go-skynet/LocalAI/core/config"
+ "github.com/go-skynet/LocalAI/core/schema"
+ model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/gofiber/fiber/v2"
+ "github.com/google/uuid"
"github.com/rs/zerolog/log"
)
-func EditEndpoint(fce *fiberContext.FiberContextExtractor, oais *services.OpenAIService) func(c *fiber.Ctx) error {
+func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
- _, request, err := fce.OpenAIRequestFromContext(c, false)
+ modelFile, input, err := readRequest(c, ml, appConfig, true)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
- _, finalResultChannel, _, _, _, err := oais.Edit(request, false, request.Stream)
+ config, input, err := mergeRequestWithConfig(modelFile, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16)
if err != nil {
- return err
+ return fmt.Errorf("failed reading parameters from request:%w", err)
}
- rawResponse := <-finalResultChannel
- if rawResponse.Error != nil {
- return rawResponse.Error
+ log.Debug().Msgf("Parameter Config: %+v", config)
+
+ templateFile := ""
+
+ // A model can have a "file.bin.tmpl" file associated with a prompt template prefix
+ if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
+ templateFile = config.Model
}
- jsonResult, _ := json.Marshal(rawResponse.Value)
+ if config.TemplateConfig.Edit != "" {
+ templateFile = config.TemplateConfig.Edit
+ }
+
+ var result []schema.Choice
+ totalTokenUsage := backend.TokenUsage{}
+
+ for _, i := range config.InputStrings {
+ if templateFile != "" {
+ templatedInput, err := ml.EvaluateTemplateForPrompt(model.EditPromptTemplate, templateFile, model.PromptTemplateData{
+ Input: i,
+ Instruction: input.Instruction,
+ SystemPrompt: config.SystemPrompt,
+ })
+ if err == nil {
+ i = templatedInput
+ log.Debug().Msgf("Template found, input modified to: %s", i)
+ }
+ }
+
+ r, tokenUsage, err := ComputeChoices(input, i, config, appConfig, ml, func(s string, c *[]schema.Choice) {
+ *c = append(*c, schema.Choice{Text: s})
+ }, nil)
+ if err != nil {
+ return err
+ }
+
+ totalTokenUsage.Prompt += tokenUsage.Prompt
+ totalTokenUsage.Completion += tokenUsage.Completion
+
+ result = append(result, r...)
+ }
+
+ id := uuid.New().String()
+ created := int(time.Now().Unix())
+ resp := &schema.OpenAIResponse{
+ ID: id,
+ Created: created,
+ Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
+ Choices: result,
+ Object: "edit",
+ Usage: schema.OpenAIUsage{
+ PromptTokens: totalTokenUsage.Prompt,
+ CompletionTokens: totalTokenUsage.Completion,
+ TotalTokens: totalTokenUsage.Prompt + totalTokenUsage.Completion,
+ },
+ }
+
+ jsonResult, _ := json.Marshal(resp)
log.Debug().Msgf("Response: %s", jsonResult)
// Return the prediction in the response body
- return c.JSON(rawResponse.Value)
+ return c.JSON(resp)
}
}
diff --git a/core/http/endpoints/openai/embeddings.go b/core/http/endpoints/openai/embeddings.go
index be546991..eca34f79 100644
--- a/core/http/endpoints/openai/embeddings.go
+++ b/core/http/endpoints/openai/embeddings.go
@@ -3,9 +3,14 @@ package openai
import (
"encoding/json"
"fmt"
+ "time"
"github.com/go-skynet/LocalAI/core/backend"
- fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
+ "github.com/go-skynet/LocalAI/core/config"
+ "github.com/go-skynet/LocalAI/pkg/model"
+
+ "github.com/go-skynet/LocalAI/core/schema"
+ "github.com/google/uuid"
"github.com/gofiber/fiber/v2"
"github.com/rs/zerolog/log"
@@ -16,25 +21,63 @@ import (
// @Param request body schema.OpenAIRequest true "query params"
// @Success 200 {object} schema.OpenAIResponse "Response"
// @Router /v1/embeddings [post]
-func EmbeddingsEndpoint(fce *fiberContext.FiberContextExtractor, ebs *backend.EmbeddingsBackendService) func(c *fiber.Ctx) error {
+func EmbeddingsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
- _, input, err := fce.OpenAIRequestFromContext(c, true)
+ model, input, err := readRequest(c, ml, appConfig, true)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
- responseChannel := ebs.Embeddings(input)
-
- rawResponse := <-responseChannel
-
- if rawResponse.Error != nil {
- return rawResponse.Error
+ config, input, err := mergeRequestWithConfig(model, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16)
+ if err != nil {
+ return fmt.Errorf("failed reading parameters from request:%w", err)
}
- jsonResult, _ := json.Marshal(rawResponse.Value)
+ log.Debug().Msgf("Parameter Config: %+v", config)
+ items := []schema.Item{}
+
+ for i, s := range config.InputToken {
+ // get the model function to call for the result
+ embedFn, err := backend.ModelEmbedding("", s, ml, *config, appConfig)
+ if err != nil {
+ return err
+ }
+
+ embeddings, err := embedFn()
+ if err != nil {
+ return err
+ }
+ items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"})
+ }
+
+ for i, s := range config.InputStrings {
+ // get the model function to call for the result
+ embedFn, err := backend.ModelEmbedding(s, []int{}, ml, *config, appConfig)
+ if err != nil {
+ return err
+ }
+
+ embeddings, err := embedFn()
+ if err != nil {
+ return err
+ }
+ items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"})
+ }
+
+ id := uuid.New().String()
+ created := int(time.Now().Unix())
+ resp := &schema.OpenAIResponse{
+ ID: id,
+ Created: created,
+ Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
+ Data: items,
+ Object: "list",
+ }
+
+ jsonResult, _ := json.Marshal(resp)
log.Debug().Msgf("Response: %s", jsonResult)
// Return the prediction in the response body
- return c.JSON(rawResponse.Value)
+ return c.JSON(resp)
}
}
diff --git a/core/http/endpoints/openai/image.go b/core/http/endpoints/openai/image.go
index ec3d84da..9e806b3e 100644
--- a/core/http/endpoints/openai/image.go
+++ b/core/http/endpoints/openai/image.go
@@ -1,18 +1,50 @@
package openai
import (
+ "bufio"
+ "encoding/base64"
"encoding/json"
"fmt"
+ "io"
+ "net/http"
+ "os"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "time"
- fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
+ "github.com/go-skynet/LocalAI/core/config"
+ "github.com/go-skynet/LocalAI/core/schema"
+ "github.com/google/uuid"
"github.com/go-skynet/LocalAI/core/backend"
+ model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/gofiber/fiber/v2"
"github.com/rs/zerolog/log"
)
-// https://platform.openai.com/docs/api-reference/images/create
+func downloadFile(url string) (string, error) {
+ // Get the data
+ resp, err := http.Get(url)
+ if err != nil {
+ return "", err
+ }
+ defer resp.Body.Close()
+
+ // Create the file
+ out, err := os.CreateTemp("", "image")
+ if err != nil {
+ return "", err
+ }
+ defer out.Close()
+
+ // Write the body to file
+ _, err = io.Copy(out, resp.Body)
+ return out.Name(), err
+}
+
+//
/*
*
@@ -27,36 +59,186 @@ import (
*
*/
-
// ImageEndpoint is the OpenAI Image generation API endpoint https://platform.openai.com/docs/api-reference/images/create
// @Summary Creates an image given a prompt.
// @Param request body schema.OpenAIRequest true "query params"
// @Success 200 {object} schema.OpenAIResponse "Response"
// @Router /v1/images/generations [post]
-func ImageEndpoint(fce *fiberContext.FiberContextExtractor, igbs *backend.ImageGenerationBackendService) func(c *fiber.Ctx) error {
+func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
- // TODO: Somewhat a hack. Is there a better place to assign this?
- if igbs.BaseUrlForGeneratedImages == "" {
- igbs.BaseUrlForGeneratedImages = c.BaseURL() + "/generated-images/"
- }
- _, request, err := fce.OpenAIRequestFromContext(c, false)
+ m, input, err := readRequest(c, ml, appConfig, false)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
- responseChannel := igbs.GenerateImage(request)
- rawResponse := <-responseChannel
-
- if rawResponse.Error != nil {
- return rawResponse.Error
+ if m == "" {
+ m = model.StableDiffusionBackend
}
+ log.Debug().Msgf("Loading model: %+v", m)
- jsonResult, err := json.Marshal(rawResponse.Value)
+ config, input, err := mergeRequestWithConfig(m, input, cl, ml, appConfig.Debug, 0, 0, false)
if err != nil {
- return err
+ return fmt.Errorf("failed reading parameters from request:%w", err)
}
+
+ src := ""
+ if input.File != "" {
+
+ fileData := []byte{}
+ // check if input.File is an URL, if so download it and save it
+ // to a temporary file
+ if strings.HasPrefix(input.File, "http://") || strings.HasPrefix(input.File, "https://") {
+ out, err := downloadFile(input.File)
+ if err != nil {
+ return fmt.Errorf("failed downloading file:%w", err)
+ }
+ defer os.RemoveAll(out)
+
+ fileData, err = os.ReadFile(out)
+ if err != nil {
+ return fmt.Errorf("failed reading file:%w", err)
+ }
+
+ } else {
+ // base 64 decode the file and write it somewhere
+ // that we will cleanup
+ fileData, err = base64.StdEncoding.DecodeString(input.File)
+ if err != nil {
+ return err
+ }
+ }
+
+ // Create a temporary file
+ outputFile, err := os.CreateTemp(appConfig.ImageDir, "b64")
+ if err != nil {
+ return err
+ }
+ // write the base64 result
+ writer := bufio.NewWriter(outputFile)
+ _, err = writer.Write(fileData)
+ if err != nil {
+ outputFile.Close()
+ return err
+ }
+ outputFile.Close()
+ src = outputFile.Name()
+ defer os.RemoveAll(src)
+ }
+
+ log.Debug().Msgf("Parameter Config: %+v", config)
+
+ switch config.Backend {
+ case "stablediffusion":
+ config.Backend = model.StableDiffusionBackend
+ case "tinydream":
+ config.Backend = model.TinyDreamBackend
+ case "":
+ config.Backend = model.StableDiffusionBackend
+ }
+
+ sizeParts := strings.Split(input.Size, "x")
+ if len(sizeParts) != 2 {
+ return fmt.Errorf("invalid value for 'size'")
+ }
+ width, err := strconv.Atoi(sizeParts[0])
+ if err != nil {
+ return fmt.Errorf("invalid value for 'size'")
+ }
+ height, err := strconv.Atoi(sizeParts[1])
+ if err != nil {
+ return fmt.Errorf("invalid value for 'size'")
+ }
+
+ b64JSON := false
+ if input.ResponseFormat.Type == "b64_json" {
+ b64JSON = true
+ }
+ // src and clip_skip
+ var result []schema.Item
+ for _, i := range config.PromptStrings {
+ n := input.N
+ if input.N == 0 {
+ n = 1
+ }
+ for j := 0; j < n; j++ {
+ prompts := strings.Split(i, "|")
+ positive_prompt := prompts[0]
+ negative_prompt := ""
+ if len(prompts) > 1 {
+ negative_prompt = prompts[1]
+ }
+
+ mode := 0
+ step := config.Step
+ if step == 0 {
+ step = 15
+ }
+
+ if input.Mode != 0 {
+ mode = input.Mode
+ }
+
+ if input.Step != 0 {
+ step = input.Step
+ }
+
+ tempDir := ""
+ if !b64JSON {
+ tempDir = appConfig.ImageDir
+ }
+ // Create a temporary file
+ outputFile, err := os.CreateTemp(tempDir, "b64")
+ if err != nil {
+ return err
+ }
+ outputFile.Close()
+ output := outputFile.Name() + ".png"
+ // Rename the temporary file
+ err = os.Rename(outputFile.Name(), output)
+ if err != nil {
+ return err
+ }
+
+ baseURL := c.BaseURL()
+
+ fn, err := backend.ImageGeneration(height, width, mode, step, *config.Seed, positive_prompt, negative_prompt, src, output, ml, *config, appConfig)
+ if err != nil {
+ return err
+ }
+ if err := fn(); err != nil {
+ return err
+ }
+
+ item := &schema.Item{}
+
+ if b64JSON {
+ defer os.RemoveAll(output)
+ data, err := os.ReadFile(output)
+ if err != nil {
+ return err
+ }
+ item.B64JSON = base64.StdEncoding.EncodeToString(data)
+ } else {
+ base := filepath.Base(output)
+ item.URL = baseURL + "/generated-images/" + base
+ }
+
+ result = append(result, *item)
+ }
+ }
+
+ id := uuid.New().String()
+ created := int(time.Now().Unix())
+ resp := &schema.OpenAIResponse{
+ ID: id,
+ Created: created,
+ Data: result,
+ }
+
+ jsonResult, _ := json.Marshal(resp)
log.Debug().Msgf("Response: %s", jsonResult)
+
// Return the prediction in the response body
- return c.JSON(rawResponse.Value)
+ return c.JSON(resp)
}
}
diff --git a/core/http/endpoints/openai/inference.go b/core/http/endpoints/openai/inference.go
new file mode 100644
index 00000000..06e784b7
--- /dev/null
+++ b/core/http/endpoints/openai/inference.go
@@ -0,0 +1,55 @@
+package openai
+
+import (
+ "github.com/go-skynet/LocalAI/core/backend"
+ "github.com/go-skynet/LocalAI/core/config"
+
+ "github.com/go-skynet/LocalAI/core/schema"
+ model "github.com/go-skynet/LocalAI/pkg/model"
+)
+
+func ComputeChoices(
+ req *schema.OpenAIRequest,
+ predInput string,
+ config *config.BackendConfig,
+ o *config.ApplicationConfig,
+ loader *model.ModelLoader,
+ cb func(string, *[]schema.Choice),
+ tokenCallback func(string, backend.TokenUsage) bool) ([]schema.Choice, backend.TokenUsage, error) {
+ n := req.N // number of completions to return
+ result := []schema.Choice{}
+
+ if n == 0 {
+ n = 1
+ }
+
+ images := []string{}
+ for _, m := range req.Messages {
+ images = append(images, m.StringImages...)
+ }
+
+ // get the model function to call for the result
+ predFunc, err := backend.ModelInference(req.Context, predInput, req.Messages, images, loader, *config, o, tokenCallback)
+ if err != nil {
+ return result, backend.TokenUsage{}, err
+ }
+
+ tokenUsage := backend.TokenUsage{}
+
+ for i := 0; i < n; i++ {
+ prediction, err := predFunc()
+ if err != nil {
+ return result, backend.TokenUsage{}, err
+ }
+
+ tokenUsage.Prompt += prediction.Usage.Prompt
+ tokenUsage.Completion += prediction.Usage.Completion
+
+ finetunedResponse := backend.Finetune(*config, predInput, prediction.Response)
+ cb(finetunedResponse, &result)
+
+ //result = append(result, Choice{Text: prediction})
+
+ }
+ return result, tokenUsage, err
+}
diff --git a/core/http/endpoints/openai/list.go b/core/http/endpoints/openai/list.go
index 9bb2b2ca..04e611a2 100644
--- a/core/http/endpoints/openai/list.go
+++ b/core/http/endpoints/openai/list.go
@@ -1,21 +1,61 @@
package openai
import (
+ "regexp"
+
+ "github.com/go-skynet/LocalAI/core/config"
"github.com/go-skynet/LocalAI/core/schema"
- "github.com/go-skynet/LocalAI/core/services"
+ model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/gofiber/fiber/v2"
)
-func ListModelsEndpoint(lms *services.ListModelsService) func(ctx *fiber.Ctx) error {
+func ListModelsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader) func(ctx *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
- // If blank, no filter is applied.
+ models, err := ml.ListModels()
+ if err != nil {
+ return err
+ }
+ var mm map[string]interface{} = map[string]interface{}{}
+
+ dataModels := []schema.OpenAIModel{}
+
+ var filterFn func(name string) bool
filter := c.Query("filter")
+
+ // If filter is not specified, do not filter the list by model name
+ if filter == "" {
+ filterFn = func(_ string) bool { return true }
+ } else {
+ // If filter _IS_ specified, we compile it to a regex which is used to create the filterFn
+ rxp, err := regexp.Compile(filter)
+ if err != nil {
+ return err
+ }
+ filterFn = func(name string) bool {
+ return rxp.MatchString(name)
+ }
+ }
+
// By default, exclude any loose files that are already referenced by a configuration file.
excludeConfigured := c.QueryBool("excludeConfigured", true)
- dataModels, err := lms.ListModels(filter, excludeConfigured)
- if err != nil {
- return err
+ // Start with the known configurations
+ for _, c := range cl.GetAllBackendConfigs() {
+ if excludeConfigured {
+ mm[c.Model] = nil
+ }
+
+ if filterFn(c.Name) {
+ dataModels = append(dataModels, schema.OpenAIModel{ID: c.Name, Object: "model"})
+ }
+ }
+
+ // Then iterate through the loose files:
+ for _, m := range models {
+ // And only adds them if they shouldn't be skipped.
+ if _, exists := mm[m]; !exists && filterFn(m) {
+ dataModels = append(dataModels, schema.OpenAIModel{ID: m, Object: "model"})
+ }
}
return c.JSON(struct {
diff --git a/core/http/endpoints/openai/request.go b/core/http/endpoints/openai/request.go
new file mode 100644
index 00000000..369fb0b8
--- /dev/null
+++ b/core/http/endpoints/openai/request.go
@@ -0,0 +1,285 @@
+package openai
+
+import (
+ "context"
+ "encoding/base64"
+ "encoding/json"
+ "fmt"
+ "io"
+ "net/http"
+ "strings"
+
+ "github.com/go-skynet/LocalAI/core/config"
+ fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
+ "github.com/go-skynet/LocalAI/core/schema"
+ "github.com/go-skynet/LocalAI/pkg/grammar"
+ model "github.com/go-skynet/LocalAI/pkg/model"
+ "github.com/gofiber/fiber/v2"
+ "github.com/rs/zerolog/log"
+)
+
+func readRequest(c *fiber.Ctx, ml *model.ModelLoader, o *config.ApplicationConfig, firstModel bool) (string, *schema.OpenAIRequest, error) {
+ input := new(schema.OpenAIRequest)
+
+ // Get input data from the request body
+ if err := c.BodyParser(input); err != nil {
+ return "", nil, fmt.Errorf("failed parsing request body: %w", err)
+ }
+
+ received, _ := json.Marshal(input)
+
+ ctx, cancel := context.WithCancel(o.Context)
+ input.Context = ctx
+ input.Cancel = cancel
+
+ log.Debug().Msgf("Request received: %s", string(received))
+
+ modelFile, err := fiberContext.ModelFromContext(c, ml, input.Model, firstModel)
+
+ return modelFile, input, err
+}
+
+// this function check if the string is an URL, if it's an URL downloads the image in memory
+// encodes it in base64 and returns the base64 string
+func getBase64Image(s string) (string, error) {
+ if strings.HasPrefix(s, "http") {
+ // download the image
+ resp, err := http.Get(s)
+ if err != nil {
+ return "", err
+ }
+ defer resp.Body.Close()
+
+ // read the image data into memory
+ data, err := io.ReadAll(resp.Body)
+ if err != nil {
+ return "", err
+ }
+
+ // encode the image data in base64
+ encoded := base64.StdEncoding.EncodeToString(data)
+
+ // return the base64 string
+ return encoded, nil
+ }
+
+ // if the string instead is prefixed with "data:image/jpeg;base64,", drop it
+ if strings.HasPrefix(s, "data:image/jpeg;base64,") {
+ return strings.ReplaceAll(s, "data:image/jpeg;base64,", ""), nil
+ }
+ return "", fmt.Errorf("not valid string")
+}
+
+func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIRequest) {
+ if input.Echo {
+ config.Echo = input.Echo
+ }
+ if input.TopK != nil {
+ config.TopK = input.TopK
+ }
+ if input.TopP != nil {
+ config.TopP = input.TopP
+ }
+
+ if input.Backend != "" {
+ config.Backend = input.Backend
+ }
+
+ if input.ClipSkip != 0 {
+ config.Diffusers.ClipSkip = input.ClipSkip
+ }
+
+ if input.ModelBaseName != "" {
+ config.AutoGPTQ.ModelBaseName = input.ModelBaseName
+ }
+
+ if input.NegativePromptScale != 0 {
+ config.NegativePromptScale = input.NegativePromptScale
+ }
+
+ if input.UseFastTokenizer {
+ config.UseFastTokenizer = input.UseFastTokenizer
+ }
+
+ if input.NegativePrompt != "" {
+ config.NegativePrompt = input.NegativePrompt
+ }
+
+ if input.RopeFreqBase != 0 {
+ config.RopeFreqBase = input.RopeFreqBase
+ }
+
+ if input.RopeFreqScale != 0 {
+ config.RopeFreqScale = input.RopeFreqScale
+ }
+
+ if input.Grammar != "" {
+ config.Grammar = input.Grammar
+ }
+
+ if input.Temperature != nil {
+ config.Temperature = input.Temperature
+ }
+
+ if input.Maxtokens != nil {
+ config.Maxtokens = input.Maxtokens
+ }
+
+ switch stop := input.Stop.(type) {
+ case string:
+ if stop != "" {
+ config.StopWords = append(config.StopWords, stop)
+ }
+ case []interface{}:
+ for _, pp := range stop {
+ if s, ok := pp.(string); ok {
+ config.StopWords = append(config.StopWords, s)
+ }
+ }
+ }
+
+ if len(input.Tools) > 0 {
+ for _, tool := range input.Tools {
+ input.Functions = append(input.Functions, tool.Function)
+ }
+ }
+
+ if input.ToolsChoice != nil {
+ var toolChoice grammar.Tool
+
+ switch content := input.ToolsChoice.(type) {
+ case string:
+ _ = json.Unmarshal([]byte(content), &toolChoice)
+ case map[string]interface{}:
+ dat, _ := json.Marshal(content)
+ _ = json.Unmarshal(dat, &toolChoice)
+ }
+ input.FunctionCall = map[string]interface{}{
+ "name": toolChoice.Function.Name,
+ }
+ }
+
+ // Decode each request's message content
+ index := 0
+ for i, m := range input.Messages {
+ switch content := m.Content.(type) {
+ case string:
+ input.Messages[i].StringContent = content
+ case []interface{}:
+ dat, _ := json.Marshal(content)
+ c := []schema.Content{}
+ json.Unmarshal(dat, &c)
+ for _, pp := range c {
+ if pp.Type == "text" {
+ input.Messages[i].StringContent = pp.Text
+ } else if pp.Type == "image_url" {
+ // Detect if pp.ImageURL is an URL, if it is download the image and encode it in base64:
+ base64, err := getBase64Image(pp.ImageURL.URL)
+ if err == nil {
+ input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff
+ // set a placeholder for each image
+ input.Messages[i].StringContent = fmt.Sprintf("[img-%d]", index) + input.Messages[i].StringContent
+ index++
+ } else {
+ fmt.Print("Failed encoding image", err)
+ }
+ }
+ }
+ }
+ }
+
+ if input.RepeatPenalty != 0 {
+ config.RepeatPenalty = input.RepeatPenalty
+ }
+
+ if input.FrequencyPenalty != 0 {
+ config.FrequencyPenalty = input.FrequencyPenalty
+ }
+
+ if input.PresencePenalty != 0 {
+ config.PresencePenalty = input.PresencePenalty
+ }
+
+ if input.Keep != 0 {
+ config.Keep = input.Keep
+ }
+
+ if input.Batch != 0 {
+ config.Batch = input.Batch
+ }
+
+ if input.IgnoreEOS {
+ config.IgnoreEOS = input.IgnoreEOS
+ }
+
+ if input.Seed != nil {
+ config.Seed = input.Seed
+ }
+
+ if input.TypicalP != nil {
+ config.TypicalP = input.TypicalP
+ }
+
+ switch inputs := input.Input.(type) {
+ case string:
+ if inputs != "" {
+ config.InputStrings = append(config.InputStrings, inputs)
+ }
+ case []interface{}:
+ for _, pp := range inputs {
+ switch i := pp.(type) {
+ case string:
+ config.InputStrings = append(config.InputStrings, i)
+ case []interface{}:
+ tokens := []int{}
+ for _, ii := range i {
+ tokens = append(tokens, int(ii.(float64)))
+ }
+ config.InputToken = append(config.InputToken, tokens)
+ }
+ }
+ }
+
+ // Can be either a string or an object
+ switch fnc := input.FunctionCall.(type) {
+ case string:
+ if fnc != "" {
+ config.SetFunctionCallString(fnc)
+ }
+ case map[string]interface{}:
+ var name string
+ n, exists := fnc["name"]
+ if exists {
+ nn, e := n.(string)
+ if e {
+ name = nn
+ }
+ }
+ config.SetFunctionCallNameString(name)
+ }
+
+ switch p := input.Prompt.(type) {
+ case string:
+ config.PromptStrings = append(config.PromptStrings, p)
+ case []interface{}:
+ for _, pp := range p {
+ if s, ok := pp.(string); ok {
+ config.PromptStrings = append(config.PromptStrings, s)
+ }
+ }
+ }
+}
+
+func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *config.BackendConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.BackendConfig, *schema.OpenAIRequest, error) {
+ cfg, err := cm.LoadBackendConfigFileByName(modelFile, loader.ModelPath,
+ config.LoadOptionDebug(debug),
+ config.LoadOptionThreads(threads),
+ config.LoadOptionContextSize(ctx),
+ config.LoadOptionF16(f16),
+ )
+
+ // Set the parameters for the language model prediction
+ updateRequestConfig(cfg, input)
+
+ return cfg, input, err
+}
diff --git a/core/http/endpoints/openai/transcription.go b/core/http/endpoints/openai/transcription.go
index 572cec12..c7dd39e7 100644
--- a/core/http/endpoints/openai/transcription.go
+++ b/core/http/endpoints/openai/transcription.go
@@ -9,7 +9,8 @@ import (
"path/filepath"
"github.com/go-skynet/LocalAI/core/backend"
- fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
+ "github.com/go-skynet/LocalAI/core/config"
+ model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/gofiber/fiber/v2"
"github.com/rs/zerolog/log"
@@ -22,15 +23,17 @@ import (
// @Param file formData file true "file"
// @Success 200 {object} map[string]string "Response"
// @Router /v1/audio/transcriptions [post]
-func TranscriptEndpoint(fce *fiberContext.FiberContextExtractor, tbs *backend.TranscriptionBackendService) func(c *fiber.Ctx) error {
+func TranscriptEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
- _, request, err := fce.OpenAIRequestFromContext(c, false)
+ m, input, err := readRequest(c, ml, appConfig, false)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
- // TODO: Investigate this file copy stuff later - potentially belongs in service.
-
+ config, input, err := mergeRequestWithConfig(m, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16)
+ if err != nil {
+ return fmt.Errorf("failed reading parameters from request:%w", err)
+ }
// retrieve the file data from the request
file, err := c.FormFile("file")
if err != nil {
@@ -62,16 +65,13 @@ func TranscriptEndpoint(fce *fiberContext.FiberContextExtractor, tbs *backend.Tr
log.Debug().Msgf("Audio file copied to: %+v", dst)
- request.File = dst
-
- responseChannel := tbs.Transcribe(request)
- rawResponse := <-responseChannel
-
- if rawResponse.Error != nil {
- return rawResponse.Error
+ tr, err := backend.ModelTranscription(dst, input.Language, ml, *config, appConfig)
+ if err != nil {
+ return err
}
- log.Debug().Msgf("Transcribed: %+v", rawResponse.Value)
+
+ log.Debug().Msgf("Trascribed: %+v", tr)
// TODO: handle different outputs here
- return c.Status(http.StatusOK).JSON(rawResponse.Value)
+ return c.Status(http.StatusOK).JSON(tr)
}
}
diff --git a/core/schema/transcription.go b/core/schema/whisper.go
similarity index 90%
rename from core/schema/transcription.go
rename to core/schema/whisper.go
index fe1799fa..41413c1f 100644
--- a/core/schema/transcription.go
+++ b/core/schema/whisper.go
@@ -10,7 +10,7 @@ type Segment struct {
Tokens []int `json:"tokens"`
}
-type TranscriptionResult struct {
+type Result struct {
Segments []Segment `json:"segments"`
Text string `json:"text"`
}
diff --git a/core/services/backend_monitor.go b/core/services/backend_monitor.go
index a610432c..979a67a3 100644
--- a/core/services/backend_monitor.go
+++ b/core/services/backend_monitor.go
@@ -15,22 +15,22 @@ import (
gopsutil "github.com/shirou/gopsutil/v3/process"
)
-type BackendMonitorService struct {
+type BackendMonitor struct {
configLoader *config.BackendConfigLoader
modelLoader *model.ModelLoader
options *config.ApplicationConfig // Taking options in case we need to inspect ExternalGRPCBackends, though that's out of scope for now, hence the name.
}
-func NewBackendMonitorService(modelLoader *model.ModelLoader, configLoader *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *BackendMonitorService {
- return &BackendMonitorService{
+func NewBackendMonitor(configLoader *config.BackendConfigLoader, modelLoader *model.ModelLoader, appConfig *config.ApplicationConfig) BackendMonitor {
+ return BackendMonitor{
configLoader: configLoader,
modelLoader: modelLoader,
options: appConfig,
}
}
-func (bms BackendMonitorService) getModelLoaderIDFromModelName(modelName string) (string, error) {
- config, exists := bms.configLoader.GetBackendConfig(modelName)
+func (bm BackendMonitor) getModelLoaderIDFromModelName(modelName string) (string, error) {
+ config, exists := bm.configLoader.GetBackendConfig(modelName)
var backendId string
if exists {
backendId = config.Model
@@ -46,8 +46,8 @@ func (bms BackendMonitorService) getModelLoaderIDFromModelName(modelName string)
return backendId, nil
}
-func (bms *BackendMonitorService) SampleLocalBackendProcess(model string) (*schema.BackendMonitorResponse, error) {
- config, exists := bms.configLoader.GetBackendConfig(model)
+func (bm *BackendMonitor) SampleLocalBackendProcess(model string) (*schema.BackendMonitorResponse, error) {
+ config, exists := bm.configLoader.GetBackendConfig(model)
var backend string
if exists {
backend = config.Model
@@ -60,7 +60,7 @@ func (bms *BackendMonitorService) SampleLocalBackendProcess(model string) (*sche
backend = fmt.Sprintf("%s.bin", backend)
}
- pid, err := bms.modelLoader.GetGRPCPID(backend)
+ pid, err := bm.modelLoader.GetGRPCPID(backend)
if err != nil {
log.Error().Err(err).Str("model", model).Msg("failed to find GRPC pid")
@@ -101,12 +101,12 @@ func (bms *BackendMonitorService) SampleLocalBackendProcess(model string) (*sche
}, nil
}
-func (bms BackendMonitorService) CheckAndSample(modelName string) (*proto.StatusResponse, error) {
- backendId, err := bms.getModelLoaderIDFromModelName(modelName)
+func (bm BackendMonitor) CheckAndSample(modelName string) (*proto.StatusResponse, error) {
+ backendId, err := bm.getModelLoaderIDFromModelName(modelName)
if err != nil {
return nil, err
}
- modelAddr := bms.modelLoader.CheckIsLoaded(backendId)
+ modelAddr := bm.modelLoader.CheckIsLoaded(backendId)
if modelAddr == "" {
return nil, fmt.Errorf("backend %s is not currently loaded", backendId)
}
@@ -114,7 +114,7 @@ func (bms BackendMonitorService) CheckAndSample(modelName string) (*proto.Status
status, rpcErr := modelAddr.GRPC(false, nil).Status(context.TODO())
if rpcErr != nil {
log.Warn().Msgf("backend %s experienced an error retrieving status info: %s", backendId, rpcErr.Error())
- val, slbErr := bms.SampleLocalBackendProcess(backendId)
+ val, slbErr := bm.SampleLocalBackendProcess(backendId)
if slbErr != nil {
return nil, fmt.Errorf("backend %s experienced an error retrieving status info via rpc: %s, then failed local node process sample: %s", backendId, rpcErr.Error(), slbErr.Error())
}
@@ -131,10 +131,10 @@ func (bms BackendMonitorService) CheckAndSample(modelName string) (*proto.Status
return status, nil
}
-func (bms BackendMonitorService) ShutdownModel(modelName string) error {
- backendId, err := bms.getModelLoaderIDFromModelName(modelName)
+func (bm BackendMonitor) ShutdownModel(modelName string) error {
+ backendId, err := bm.getModelLoaderIDFromModelName(modelName)
if err != nil {
return err
}
- return bms.modelLoader.ShutdownModel(backendId)
+ return bm.modelLoader.ShutdownModel(backendId)
}
diff --git a/core/services/gallery.go b/core/services/gallery.go
index 1ef8e3e2..b068abbb 100644
--- a/core/services/gallery.go
+++ b/core/services/gallery.go
@@ -3,18 +3,14 @@ package services
import (
"context"
"encoding/json"
- "errors"
"os"
- "path/filepath"
"strings"
"sync"
"github.com/go-skynet/LocalAI/core/config"
- "github.com/go-skynet/LocalAI/embedded"
- "github.com/go-skynet/LocalAI/pkg/downloader"
"github.com/go-skynet/LocalAI/pkg/gallery"
+ "github.com/go-skynet/LocalAI/pkg/startup"
"github.com/go-skynet/LocalAI/pkg/utils"
- "github.com/rs/zerolog/log"
"gopkg.in/yaml.v2"
)
@@ -33,6 +29,18 @@ func NewGalleryService(modelPath string) *GalleryService {
}
}
+func prepareModel(modelPath string, req gallery.GalleryModel, cl *config.BackendConfigLoader, downloadStatus func(string, string, string, float64)) error {
+
+ config, err := gallery.GetGalleryConfigFromURL(req.URL)
+ if err != nil {
+ return err
+ }
+
+ config.Files = append(config.Files, req.AdditionalFiles...)
+
+ return gallery.InstallModel(modelPath, req.Name, &config, req.Overrides, downloadStatus)
+}
+
func (g *GalleryService) UpdateStatus(s string, op *gallery.GalleryOpStatus) {
g.Lock()
defer g.Unlock()
@@ -84,10 +92,10 @@ func (g *GalleryService) Start(c context.Context, cl *config.BackendConfigLoader
err = gallery.InstallModelFromGalleryByName(op.Galleries, op.GalleryName, g.modelPath, op.Req, progressCallback)
}
} else if op.ConfigURL != "" {
- PreloadModelsConfigurations(op.ConfigURL, g.modelPath, op.ConfigURL)
+ startup.PreloadModelsConfigurations(op.ConfigURL, g.modelPath, op.ConfigURL)
err = cl.Preload(g.modelPath)
} else {
- err = prepareModel(g.modelPath, op.Req, progressCallback)
+ err = prepareModel(g.modelPath, op.Req, cl, progressCallback)
}
if err != nil {
@@ -119,12 +127,13 @@ type galleryModel struct {
ID string `json:"id"`
}
-func processRequests(modelPath string, galleries []gallery.Gallery, requests []galleryModel) error {
+func processRequests(modelPath, s string, cm *config.BackendConfigLoader, galleries []gallery.Gallery, requests []galleryModel) error {
var err error
for _, r := range requests {
utils.ResetDownloadTimers()
if r.ID == "" {
- err = prepareModel(modelPath, r.GalleryModel, utils.DisplayDownloadFunction)
+ err = prepareModel(modelPath, r.GalleryModel, cm, utils.DisplayDownloadFunction)
+
} else {
if strings.Contains(r.ID, "@") {
err = gallery.InstallModelFromGallery(
@@ -149,7 +158,7 @@ func ApplyGalleryFromFile(modelPath, s string, cl *config.BackendConfigLoader, g
return err
}
- return processRequests(modelPath, galleries, requests)
+ return processRequests(modelPath, s, cl, galleries, requests)
}
func ApplyGalleryFromString(modelPath, s string, cl *config.BackendConfigLoader, galleries []gallery.Gallery) error {
@@ -159,90 +168,5 @@ func ApplyGalleryFromString(modelPath, s string, cl *config.BackendConfigLoader,
return err
}
- return processRequests(modelPath, galleries, requests)
-}
-
-// PreloadModelsConfigurations will preload models from the given list of URLs
-// It will download the model if it is not already present in the model path
-// It will also try to resolve if the model is an embedded model YAML configuration
-func PreloadModelsConfigurations(modelLibraryURL string, modelPath string, models ...string) {
- for _, url := range models {
-
- // As a best effort, try to resolve the model from the remote library
- // if it's not resolved we try with the other method below
- if modelLibraryURL != "" {
- lib, err := embedded.GetRemoteLibraryShorteners(modelLibraryURL)
- if err == nil {
- if lib[url] != "" {
- log.Debug().Msgf("[startup] model configuration is defined remotely: %s (%s)", url, lib[url])
- url = lib[url]
- }
- }
- }
-
- url = embedded.ModelShortURL(url)
- switch {
- case embedded.ExistsInModelsLibrary(url):
- modelYAML, err := embedded.ResolveContent(url)
- // If we resolve something, just save it to disk and continue
- if err != nil {
- log.Error().Err(err).Msg("error resolving model content")
- continue
- }
-
- log.Debug().Msgf("[startup] resolved embedded model: %s", url)
- md5Name := utils.MD5(url)
- modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml"
- if err := os.WriteFile(modelDefinitionFilePath, modelYAML, os.ModePerm); err != nil {
- log.Error().Err(err).Str("filepath", modelDefinitionFilePath).Msg("error writing model definition")
- }
- case downloader.LooksLikeURL(url):
- log.Debug().Msgf("[startup] resolved model to download: %s", url)
-
- // md5 of model name
- md5Name := utils.MD5(url)
-
- // check if file exists
- if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) {
- modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml"
- err := downloader.DownloadFile(url, modelDefinitionFilePath, "", func(fileName, current, total string, percent float64) {
- utils.DisplayDownloadFunction(fileName, current, total, percent)
- })
- if err != nil {
- log.Error().Err(err).Str("url", url).Str("filepath", modelDefinitionFilePath).Msg("error downloading model")
- }
- }
- default:
- if _, err := os.Stat(url); err == nil {
- log.Debug().Msgf("[startup] resolved local model: %s", url)
- // copy to modelPath
- md5Name := utils.MD5(url)
-
- modelYAML, err := os.ReadFile(url)
- if err != nil {
- log.Error().Err(err).Str("filepath", url).Msg("error reading model definition")
- continue
- }
-
- modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml"
- if err := os.WriteFile(modelDefinitionFilePath, modelYAML, os.ModePerm); err != nil {
- log.Error().Err(err).Str("filepath", modelDefinitionFilePath).Msg("error loading model: %s")
- }
- } else {
- log.Warn().Msgf("[startup] failed resolving model '%s'", url)
- }
- }
- }
-}
-
-func prepareModel(modelPath string, req gallery.GalleryModel, downloadStatus func(string, string, string, float64)) error {
-
- config, err := gallery.GetGalleryConfigFromURL(req.URL)
- if err != nil {
- return err
- }
-
- config.Files = append(config.Files, req.AdditionalFiles...)
-
- return gallery.InstallModel(modelPath, req.Name, &config, req.Overrides, downloadStatus)
+ return processRequests(modelPath, s, cl, galleries, requests)
}
diff --git a/core/services/list_models.go b/core/services/list_models.go
deleted file mode 100644
index a21e6faf..00000000
--- a/core/services/list_models.go
+++ /dev/null
@@ -1,72 +0,0 @@
-package services
-
-import (
- "regexp"
-
- "github.com/go-skynet/LocalAI/core/config"
- "github.com/go-skynet/LocalAI/core/schema"
- "github.com/go-skynet/LocalAI/pkg/model"
-)
-
-type ListModelsService struct {
- bcl *config.BackendConfigLoader
- ml *model.ModelLoader
- appConfig *config.ApplicationConfig
-}
-
-func NewListModelsService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *ListModelsService {
- return &ListModelsService{
- bcl: bcl,
- ml: ml,
- appConfig: appConfig,
- }
-}
-
-func (lms *ListModelsService) ListModels(filter string, excludeConfigured bool) ([]schema.OpenAIModel, error) {
-
- models, err := lms.ml.ListModels()
- if err != nil {
- return nil, err
- }
-
- var mm map[string]interface{} = map[string]interface{}{}
-
- dataModels := []schema.OpenAIModel{}
-
- var filterFn func(name string) bool
-
- // If filter is not specified, do not filter the list by model name
- if filter == "" {
- filterFn = func(_ string) bool { return true }
- } else {
- // If filter _IS_ specified, we compile it to a regex which is used to create the filterFn
- rxp, err := regexp.Compile(filter)
- if err != nil {
- return nil, err
- }
- filterFn = func(name string) bool {
- return rxp.MatchString(name)
- }
- }
-
- // Start with the known configurations
- for _, c := range lms.bcl.GetAllBackendConfigs() {
- if excludeConfigured {
- mm[c.Model] = nil
- }
-
- if filterFn(c.Name) {
- dataModels = append(dataModels, schema.OpenAIModel{ID: c.Name, Object: "model"})
- }
- }
-
- // Then iterate through the loose files:
- for _, m := range models {
- // And only adds them if they shouldn't be skipped.
- if _, exists := mm[m]; !exists && filterFn(m) {
- dataModels = append(dataModels, schema.OpenAIModel{ID: m, Object: "model"})
- }
- }
-
- return dataModels, nil
-}
diff --git a/core/services/openai.go b/core/services/openai.go
deleted file mode 100644
index 7a2679ad..00000000
--- a/core/services/openai.go
+++ /dev/null
@@ -1,808 +0,0 @@
-package services
-
-import (
- "encoding/json"
- "errors"
- "fmt"
- "strings"
- "sync"
- "time"
-
- "github.com/go-skynet/LocalAI/core/backend"
- "github.com/go-skynet/LocalAI/core/config"
- "github.com/go-skynet/LocalAI/core/schema"
- "github.com/go-skynet/LocalAI/pkg/concurrency"
- "github.com/go-skynet/LocalAI/pkg/grammar"
- "github.com/go-skynet/LocalAI/pkg/model"
- "github.com/go-skynet/LocalAI/pkg/utils"
- "github.com/google/uuid"
- "github.com/imdario/mergo"
- "github.com/rs/zerolog/log"
-)
-
-type endpointGenerationConfigurationFn func(bc *config.BackendConfig, request *schema.OpenAIRequest) endpointConfiguration
-
-type endpointConfiguration struct {
- SchemaObject string
- TemplatePath string
- TemplateData model.PromptTemplateData
- ResultMappingFn func(resp *backend.LLMResponse, index int) schema.Choice
- CompletionMappingFn func(resp concurrency.ErrorOr[*backend.LLMResponse]) concurrency.ErrorOr[*schema.OpenAIResponse]
- TokenMappingFn func(resp concurrency.ErrorOr[*backend.LLMResponse]) concurrency.ErrorOr[*schema.OpenAIResponse]
-}
-
-// TODO: This is used for completion and edit. I am pretty sure I forgot parts, but fix it later.
-func simpleMapper(resp concurrency.ErrorOr[*backend.LLMResponse]) concurrency.ErrorOr[*schema.OpenAIResponse] {
- if resp.Error != nil || resp.Value == nil {
- return concurrency.ErrorOr[*schema.OpenAIResponse]{Error: resp.Error}
- }
- return concurrency.ErrorOr[*schema.OpenAIResponse]{
- Value: &schema.OpenAIResponse{
- Choices: []schema.Choice{
- {
- Text: resp.Value.Response,
- },
- },
- Usage: schema.OpenAIUsage{
- PromptTokens: resp.Value.Usage.Prompt,
- CompletionTokens: resp.Value.Usage.Completion,
- TotalTokens: resp.Value.Usage.Prompt + resp.Value.Usage.Completion,
- },
- },
- }
-}
-
-// TODO: Consider alternative names for this.
-// The purpose of this struct is to hold a reference to the OpenAI request context information
-// This keeps things simple within core/services/openai.go and allows consumers to "see" this information if they need it
-type OpenAIRequestTraceID struct {
- ID string
- Created int
-}
-
-// This type split out from core/backend/llm.go - I'm still not _totally_ sure about this, but it seems to make sense to keep the generic LLM code from the OpenAI specific higher level functionality
-type OpenAIService struct {
- bcl *config.BackendConfigLoader
- ml *model.ModelLoader
- appConfig *config.ApplicationConfig
- llmbs *backend.LLMBackendService
-}
-
-func NewOpenAIService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig, llmbs *backend.LLMBackendService) *OpenAIService {
- return &OpenAIService{
- bcl: bcl,
- ml: ml,
- appConfig: appConfig,
- llmbs: llmbs,
- }
-}
-
-// Keeping in place as a reminder to POTENTIALLY ADD MORE VALIDATION HERE???
-func (oais *OpenAIService) getConfig(request *schema.OpenAIRequest) (*config.BackendConfig, *schema.OpenAIRequest, error) {
- return oais.bcl.LoadBackendConfigForModelAndOpenAIRequest(request.Model, request, oais.appConfig)
-}
-
-// TODO: It would be a lot less messy to make a return struct that had references to each of these channels
-// INTENTIONALLY not doing that quite yet - I believe we need to let the references to unused channels die for the GC to automatically collect -- can we manually free()?
-// finalResultsChannel is the primary async return path: one result for the entire request.
-// promptResultsChannels is DUBIOUS. It's expected to be raw fan-out used within the function itself, but I am exposing for testing? One bundle of LLMResponseBundle per PromptString? Gets all N completions for a single prompt.
-// completionsChannel is a channel that emits one *LLMResponse per generated completion, be that different prompts or N. Seems the most useful other than "entire request" Request is available to attempt tracing???
-// tokensChannel is a channel that emits one *LLMResponse per generated token. Let's see what happens!
-func (oais *OpenAIService) Completion(request *schema.OpenAIRequest, notifyOnPromptResult bool, notifyOnToken bool) (
- traceID *OpenAIRequestTraceID, finalResultChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], promptResultsChannels []<-chan concurrency.ErrorOr[*backend.LLMResponseBundle],
- completionsChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], tokenChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], err error) {
-
- return oais.GenerateTextFromRequest(request, func(bc *config.BackendConfig, request *schema.OpenAIRequest) endpointConfiguration {
- return endpointConfiguration{
- SchemaObject: "text_completion",
- TemplatePath: bc.TemplateConfig.Completion,
- TemplateData: model.PromptTemplateData{
- SystemPrompt: bc.SystemPrompt,
- },
- ResultMappingFn: func(resp *backend.LLMResponse, promptIndex int) schema.Choice {
- return schema.Choice{
- Index: promptIndex,
- FinishReason: "stop",
- Text: resp.Response,
- }
- },
- CompletionMappingFn: simpleMapper,
- TokenMappingFn: simpleMapper,
- }
- }, notifyOnPromptResult, notifyOnToken, nil)
-}
-
-func (oais *OpenAIService) Edit(request *schema.OpenAIRequest, notifyOnPromptResult bool, notifyOnToken bool) (
- traceID *OpenAIRequestTraceID, finalResultChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], promptResultsChannels []<-chan concurrency.ErrorOr[*backend.LLMResponseBundle],
- completionsChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], tokenChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], err error) {
-
- return oais.GenerateTextFromRequest(request, func(bc *config.BackendConfig, request *schema.OpenAIRequest) endpointConfiguration {
-
- return endpointConfiguration{
- SchemaObject: "edit",
- TemplatePath: bc.TemplateConfig.Edit,
- TemplateData: model.PromptTemplateData{
- SystemPrompt: bc.SystemPrompt,
- Instruction: request.Instruction,
- },
- ResultMappingFn: func(resp *backend.LLMResponse, promptIndex int) schema.Choice {
- return schema.Choice{
- Index: promptIndex,
- FinishReason: "stop",
- Text: resp.Response,
- }
- },
- CompletionMappingFn: simpleMapper,
- TokenMappingFn: simpleMapper,
- }
- }, notifyOnPromptResult, notifyOnToken, nil)
-}
-
-func (oais *OpenAIService) Chat(request *schema.OpenAIRequest, notifyOnPromptResult bool, notifyOnToken bool) (
- traceID *OpenAIRequestTraceID, finalResultChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse],
- completionsChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], tokenChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], err error) {
-
- return oais.GenerateFromMultipleMessagesChatRequest(request, notifyOnPromptResult, notifyOnToken, nil)
-}
-
-func (oais *OpenAIService) GenerateTextFromRequest(request *schema.OpenAIRequest, endpointConfigFn endpointGenerationConfigurationFn, notifyOnPromptResult bool, notifyOnToken bool, initialTraceID *OpenAIRequestTraceID) (
- traceID *OpenAIRequestTraceID, finalResultChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], promptResultsChannels []<-chan concurrency.ErrorOr[*backend.LLMResponseBundle],
- completionsChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], tokenChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], err error) {
-
- if initialTraceID == nil {
- traceID = &OpenAIRequestTraceID{
- ID: uuid.New().String(),
- Created: int(time.Now().Unix()),
- }
- } else {
- traceID = initialTraceID
- }
-
- bc, request, err := oais.getConfig(request)
- if err != nil {
- log.Error().Err(err).Msgf("[oais::GenerateTextFromRequest] error getting configuration")
- return
- }
-
- if request.ResponseFormat.Type == "json_object" {
- request.Grammar = grammar.JSONBNF
- }
-
- bc.Grammar = request.Grammar
-
- if request.Stream && len(bc.PromptStrings) > 1 {
- log.Warn().Msg("potentially cannot handle more than 1 `PromptStrings` when Streaming?")
- }
-
- rawFinalResultChannel := make(chan concurrency.ErrorOr[*schema.OpenAIResponse])
- finalResultChannel = rawFinalResultChannel
- promptResultsChannels = []<-chan concurrency.ErrorOr[*backend.LLMResponseBundle]{}
- var rawCompletionsChannel chan concurrency.ErrorOr[*schema.OpenAIResponse]
- var rawTokenChannel chan concurrency.ErrorOr[*schema.OpenAIResponse]
- if notifyOnPromptResult {
- rawCompletionsChannel = make(chan concurrency.ErrorOr[*schema.OpenAIResponse])
- }
- if notifyOnToken {
- rawTokenChannel = make(chan concurrency.ErrorOr[*schema.OpenAIResponse])
- }
-
- promptResultsChannelLock := sync.Mutex{}
-
- endpointConfig := endpointConfigFn(bc, request)
-
- if len(endpointConfig.TemplatePath) == 0 {
- // A model can have a "file.bin.tmpl" file associated with a prompt template prefix
- if oais.ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", bc.Model)) {
- endpointConfig.TemplatePath = bc.Model
- } else {
- log.Warn().Msgf("failed to find any template for %+v", request)
- }
- }
-
- setupWG := sync.WaitGroup{}
- var prompts []string
- if lPS := len(bc.PromptStrings); lPS > 0 {
- setupWG.Add(lPS)
- prompts = bc.PromptStrings
- } else {
- setupWG.Add(len(bc.InputStrings))
- prompts = bc.InputStrings
- }
-
- var setupError error = nil
-
- for pI, p := range prompts {
-
- go func(promptIndex int, prompt string) {
- if endpointConfig.TemplatePath != "" {
- promptTemplateData := model.PromptTemplateData{
- Input: prompt,
- }
- err := mergo.Merge(promptTemplateData, endpointConfig.TemplateData, mergo.WithOverride)
- if err == nil {
- templatedInput, err := oais.ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, endpointConfig.TemplatePath, promptTemplateData)
- if err == nil {
- prompt = templatedInput
- log.Debug().Msgf("Template found, input modified to: %s", prompt)
- }
- }
- }
-
- log.Debug().Msgf("[OAIS GenerateTextFromRequest] Prompt: %q", prompt)
- promptResultsChannel, completionChannels, tokenChannels, err := oais.llmbs.GenerateText(prompt, request, bc,
- func(r *backend.LLMResponse) schema.Choice {
- return endpointConfig.ResultMappingFn(r, promptIndex)
- }, notifyOnPromptResult, notifyOnToken)
- if err != nil {
- log.Error().Msgf("Unable to generate text prompt: %q\nerr: %q", prompt, err)
- promptResultsChannelLock.Lock()
- setupError = errors.Join(setupError, err)
- promptResultsChannelLock.Unlock()
- setupWG.Done()
- return
- }
- if notifyOnPromptResult {
- concurrency.SliceOfChannelsRawMergerWithoutMapping(concurrency.SliceOfChannelsTransformer(completionChannels, endpointConfig.CompletionMappingFn), rawCompletionsChannel, true)
- }
- if notifyOnToken {
- concurrency.SliceOfChannelsRawMergerWithoutMapping(concurrency.SliceOfChannelsTransformer(tokenChannels, endpointConfig.TokenMappingFn), rawTokenChannel, true)
- }
- promptResultsChannelLock.Lock()
- promptResultsChannels = append(promptResultsChannels, promptResultsChannel)
- promptResultsChannelLock.Unlock()
- setupWG.Done()
- }(pI, p)
-
- }
- setupWG.Wait()
-
- // If any of the setup goroutines experienced an error, quit early here.
- if setupError != nil {
- go func() {
- log.Error().Err(setupError).Msgf("[OAIS GenerateTextFromRequest] caught an error during setup")
- rawFinalResultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: setupError}
- close(rawFinalResultChannel)
- }()
- return
- }
-
- initialResponse := &schema.OpenAIResponse{
- ID: traceID.ID,
- Created: traceID.Created,
- Model: request.Model,
- Object: endpointConfig.SchemaObject,
- Usage: schema.OpenAIUsage{},
- }
-
- // utils.SliceOfChannelsRawMerger[[]schema.Choice](promptResultsChannels, rawFinalResultChannel, func(results []schema.Choice) (*schema.OpenAIResponse, error) {
- concurrency.SliceOfChannelsReducer(
- promptResultsChannels, rawFinalResultChannel,
- func(iv concurrency.ErrorOr[*backend.LLMResponseBundle], result concurrency.ErrorOr[*schema.OpenAIResponse]) concurrency.ErrorOr[*schema.OpenAIResponse] {
-
- if iv.Error != nil {
- result.Error = iv.Error
- return result
- }
- result.Value.Usage.PromptTokens += iv.Value.Usage.Prompt
- result.Value.Usage.CompletionTokens += iv.Value.Usage.Completion
- result.Value.Usage.TotalTokens = result.Value.Usage.PromptTokens + result.Value.Usage.CompletionTokens
-
- result.Value.Choices = append(result.Value.Choices, iv.Value.Response...)
-
- return result
- }, concurrency.ErrorOr[*schema.OpenAIResponse]{Value: initialResponse}, true)
-
- completionsChannel = rawCompletionsChannel
- tokenChannel = rawTokenChannel
-
- return
-}
-
-// TODO: For porting sanity, this is distinct from GenerateTextFromRequest and is _currently_ specific to Chat purposes
-// this is not a final decision -- just a reality of moving a lot of parts at once
-// / This has _become_ Chat which wasn't the goal... More cleanup in the future once it's stable?
-func (oais *OpenAIService) GenerateFromMultipleMessagesChatRequest(request *schema.OpenAIRequest, notifyOnPromptResult bool, notifyOnToken bool, initialTraceID *OpenAIRequestTraceID) (
- traceID *OpenAIRequestTraceID, finalResultChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse],
- completionsChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], tokenChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], err error) {
-
- if initialTraceID == nil {
- traceID = &OpenAIRequestTraceID{
- ID: uuid.New().String(),
- Created: int(time.Now().Unix()),
- }
- } else {
- traceID = initialTraceID
- }
-
- bc, request, err := oais.getConfig(request)
- if err != nil {
- return
- }
-
- // Allow the user to set custom actions via config file
- // to be "embedded" in each model
- noActionName := "answer"
- noActionDescription := "use this action to answer without performing any action"
-
- if bc.FunctionsConfig.NoActionFunctionName != "" {
- noActionName = bc.FunctionsConfig.NoActionFunctionName
- }
- if bc.FunctionsConfig.NoActionDescriptionName != "" {
- noActionDescription = bc.FunctionsConfig.NoActionDescriptionName
- }
-
- if request.ResponseFormat.Type == "json_object" {
- request.Grammar = grammar.JSONBNF
- }
-
- bc.Grammar = request.Grammar
-
- processFunctions := false
- funcs := grammar.Functions{}
- // process functions if we have any defined or if we have a function call string
- if len(request.Functions) > 0 && bc.ShouldUseFunctions() {
- log.Debug().Msgf("Response needs to process functions")
-
- processFunctions = true
-
- noActionGrammar := grammar.Function{
- Name: noActionName,
- Description: noActionDescription,
- Parameters: map[string]interface{}{
- "properties": map[string]interface{}{
- "message": map[string]interface{}{
- "type": "string",
- "description": "The message to reply the user with",
- }},
- },
- }
-
- // Append the no action function
- funcs = append(funcs, request.Functions...)
- if !bc.FunctionsConfig.DisableNoAction {
- funcs = append(funcs, noActionGrammar)
- }
-
- // Force picking one of the functions by the request
- if bc.FunctionToCall() != "" {
- funcs = funcs.Select(bc.FunctionToCall())
- }
-
- // Update input grammar
- jsStruct := funcs.ToJSONStructure()
- bc.Grammar = jsStruct.Grammar("", bc.FunctionsConfig.ParallelCalls)
- } else if request.JSONFunctionGrammarObject != nil {
- bc.Grammar = request.JSONFunctionGrammarObject.Grammar("", bc.FunctionsConfig.ParallelCalls)
- }
-
- if request.Stream && processFunctions {
- log.Warn().Msg("Streaming + Functions is highly experimental in this version")
- }
-
- var predInput string
-
- if !bc.TemplateConfig.UseTokenizerTemplate || processFunctions {
-
- suppressConfigSystemPrompt := false
- mess := []string{}
- for messageIndex, i := range request.Messages {
- var content string
- role := i.Role
-
- // if function call, we might want to customize the role so we can display better that the "assistant called a json action"
- // if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request
- if (i.FunctionCall != nil || i.ToolCalls != nil) && i.Role == "assistant" {
- roleFn := "assistant_function_call"
- r := bc.Roles[roleFn]
- if r != "" {
- role = roleFn
- }
- }
- r := bc.Roles[role]
- contentExists := i.Content != nil && i.StringContent != ""
-
- fcall := i.FunctionCall
- if len(i.ToolCalls) > 0 {
- fcall = i.ToolCalls
- }
-
- // First attempt to populate content via a chat message specific template
- if bc.TemplateConfig.ChatMessage != "" {
- chatMessageData := model.ChatMessageTemplateData{
- SystemPrompt: bc.SystemPrompt,
- Role: r,
- RoleName: role,
- Content: i.StringContent,
- FunctionCall: fcall,
- FunctionName: i.Name,
- LastMessage: messageIndex == (len(request.Messages) - 1),
- Function: bc.Grammar != "" && (messageIndex == (len(request.Messages) - 1)),
- MessageIndex: messageIndex,
- }
- templatedChatMessage, err := oais.ml.EvaluateTemplateForChatMessage(bc.TemplateConfig.ChatMessage, chatMessageData)
- if err != nil {
- log.Error().Msgf("error processing message %+v using template \"%s\": %v. Skipping!", chatMessageData, bc.TemplateConfig.ChatMessage, err)
- } else {
- if templatedChatMessage == "" {
- log.Warn().Msgf("template \"%s\" produced blank output for %+v. Skipping!", bc.TemplateConfig.ChatMessage, chatMessageData)
- continue // TODO: This continue is here intentionally to skip over the line `mess = append(mess, content)` below, and to prevent the sprintf
- }
- log.Debug().Msgf("templated message for chat: %s", templatedChatMessage)
- content = templatedChatMessage
- }
- }
- marshalAnyRole := func(f any) {
- j, err := json.Marshal(f)
- if err == nil {
- if contentExists {
- content += "\n" + fmt.Sprint(r, " ", string(j))
- } else {
- content = fmt.Sprint(r, " ", string(j))
- }
- }
- }
- marshalAny := func(f any) {
- j, err := json.Marshal(f)
- if err == nil {
- if contentExists {
- content += "\n" + string(j)
- } else {
- content = string(j)
- }
- }
- }
- // If this model doesn't have such a template, or if that template fails to return a value, template at the message level.
- if content == "" {
- if r != "" {
- if contentExists {
- content = fmt.Sprint(r, i.StringContent)
- }
-
- if i.FunctionCall != nil {
- marshalAnyRole(i.FunctionCall)
- }
- } else {
- if contentExists {
- content = fmt.Sprint(i.StringContent)
- }
-
- if i.FunctionCall != nil {
- marshalAny(i.FunctionCall)
- }
-
- if i.ToolCalls != nil {
- marshalAny(i.ToolCalls)
- }
- }
- // Special Handling: System. We care if it was printed at all, not the r branch, so check seperately
- if contentExists && role == "system" {
- suppressConfigSystemPrompt = true
- }
- }
-
- mess = append(mess, content)
- }
-
- predInput = strings.Join(mess, "\n")
-
- log.Debug().Msgf("Prompt (before templating): %s", predInput)
-
- templateFile := ""
- // A model can have a "file.bin.tmpl" file associated with a prompt template prefix
- if oais.ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", bc.Model)) {
- templateFile = bc.Model
- }
-
- if bc.TemplateConfig.Chat != "" && !processFunctions {
- templateFile = bc.TemplateConfig.Chat
- }
-
- if bc.TemplateConfig.Functions != "" && processFunctions {
- templateFile = bc.TemplateConfig.Functions
- }
-
- if templateFile != "" {
- templatedInput, err := oais.ml.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{
- SystemPrompt: bc.SystemPrompt,
- SuppressSystemPrompt: suppressConfigSystemPrompt,
- Input: predInput,
- Functions: funcs,
- })
- if err == nil {
- predInput = templatedInput
- log.Debug().Msgf("Template found, input modified to: %s", predInput)
- } else {
- log.Debug().Msgf("Template failed loading: %s", err.Error())
- }
- }
- }
- log.Debug().Msgf("Prompt (after templating): %s", predInput)
- if processFunctions {
- log.Debug().Msgf("Grammar: %+v", bc.Grammar)
- }
-
- rawFinalResultChannel := make(chan concurrency.ErrorOr[*schema.OpenAIResponse])
- var rawCompletionsChannel chan concurrency.ErrorOr[*schema.OpenAIResponse]
- var rawTokenChannel chan concurrency.ErrorOr[*schema.OpenAIResponse]
- if notifyOnPromptResult {
- rawCompletionsChannel = make(chan concurrency.ErrorOr[*schema.OpenAIResponse])
- }
- if notifyOnToken {
- rawTokenChannel = make(chan concurrency.ErrorOr[*schema.OpenAIResponse])
- }
-
- rawResultChannel, individualCompletionChannels, tokenChannels, err := oais.llmbs.GenerateText(predInput, request, bc, func(resp *backend.LLMResponse) schema.Choice {
- return schema.Choice{
- Index: 0, // ???
- FinishReason: "stop",
- Message: &schema.Message{
- Role: "assistant",
- Content: resp.Response,
- },
- }
- }, notifyOnPromptResult, notifyOnToken)
-
- chatSimpleMappingFn := func(resp concurrency.ErrorOr[*backend.LLMResponse]) concurrency.ErrorOr[*schema.OpenAIResponse] {
- if resp.Error != nil || resp.Value == nil {
- return concurrency.ErrorOr[*schema.OpenAIResponse]{Error: resp.Error}
- }
- return concurrency.ErrorOr[*schema.OpenAIResponse]{
- Value: &schema.OpenAIResponse{
- ID: traceID.ID,
- Created: traceID.Created,
- Model: request.Model, // we have to return what the user sent here, due to OpenAI spec.
- Choices: []schema.Choice{
- {
- Delta: &schema.Message{
- Role: "assistant",
- Content: resp.Value.Response,
- },
- Index: 0,
- },
- },
- Object: "chat.completion.chunk",
- Usage: schema.OpenAIUsage{
- PromptTokens: resp.Value.Usage.Prompt,
- CompletionTokens: resp.Value.Usage.Completion,
- TotalTokens: resp.Value.Usage.Prompt + resp.Value.Usage.Completion,
- },
- },
- }
- }
-
- if notifyOnPromptResult {
- concurrency.SliceOfChannelsRawMergerWithoutMapping(concurrency.SliceOfChannelsTransformer(individualCompletionChannels, chatSimpleMappingFn), rawCompletionsChannel, true)
- }
- if notifyOnToken {
- concurrency.SliceOfChannelsRawMergerWithoutMapping(concurrency.SliceOfChannelsTransformer(tokenChannels, chatSimpleMappingFn), rawTokenChannel, true)
- }
-
- go func() {
- rawResult := <-rawResultChannel
- if rawResult.Error != nil {
- log.Warn().Msgf("OpenAIService::processTools GenerateText error [DEBUG THIS?] %q", rawResult.Error)
- return
- }
- llmResponseChoices := rawResult.Value.Response
-
- if processFunctions && len(llmResponseChoices) > 1 {
- log.Warn().Msgf("chat functions response with %d choices in response, debug this?", len(llmResponseChoices))
- log.Debug().Msgf("%+v", llmResponseChoices)
- }
-
- for _, result := range rawResult.Value.Response {
- // If no functions, just return the raw result.
- if !processFunctions {
-
- resp := schema.OpenAIResponse{
- ID: traceID.ID,
- Created: traceID.Created,
- Model: request.Model, // we have to return what the user sent here, due to OpenAI spec.
- Choices: []schema.Choice{result},
- Object: "chat.completion.chunk",
- Usage: schema.OpenAIUsage{
- PromptTokens: rawResult.Value.Usage.Prompt,
- CompletionTokens: rawResult.Value.Usage.Completion,
- TotalTokens: rawResult.Value.Usage.Prompt + rawResult.Value.Usage.Completion,
- },
- }
-
- rawFinalResultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: &resp}
-
- continue
- }
- // At this point, things are function specific!
-
- // Oh no this can't be the right way to do this... but it works. Save us, mudler!
- fString := fmt.Sprintf("%s", result.Message.Content)
- results := parseFunctionCall(fString, bc.FunctionsConfig.ParallelCalls)
- noActionToRun := (len(results) > 0 && results[0].name == noActionName)
-
- if noActionToRun {
- log.Debug().Msg("-- noActionToRun branch --")
- initialMessage := schema.OpenAIResponse{
- ID: traceID.ID,
- Created: traceID.Created,
- Model: request.Model, // we have to return what the user sent here, due to OpenAI spec.
- Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: ""}}},
- Object: "stop",
- }
- rawFinalResultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: &initialMessage}
-
- result, err := oais.handleQuestion(bc, request, results[0].arguments, predInput)
- if err != nil {
- log.Error().Msgf("error handling question: %s", err.Error())
- return
- }
-
- resp := schema.OpenAIResponse{
- ID: traceID.ID,
- Created: traceID.Created,
- Model: request.Model, // we have to return what the user sent here, due to OpenAI spec.
- Choices: []schema.Choice{{Delta: &schema.Message{Content: &result}, Index: 0}},
- Object: "chat.completion.chunk",
- Usage: schema.OpenAIUsage{
- PromptTokens: rawResult.Value.Usage.Prompt,
- CompletionTokens: rawResult.Value.Usage.Completion,
- TotalTokens: rawResult.Value.Usage.Prompt + rawResult.Value.Usage.Completion,
- },
- }
-
- rawFinalResultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: &resp}
-
- } else {
- log.Debug().Msgf("[GenerateFromMultipleMessagesChatRequest] fnResultsBranch: %+v", results)
- for i, ss := range results {
- name, args := ss.name, ss.arguments
-
- initialMessage := schema.OpenAIResponse{
- ID: traceID.ID,
- Created: traceID.Created,
- Model: request.Model, // we have to return what the user sent here, due to OpenAI spec.
- Choices: []schema.Choice{{
- FinishReason: "function_call",
- Message: &schema.Message{
- Role: "assistant",
- ToolCalls: []schema.ToolCall{
- {
- Index: i,
- ID: traceID.ID,
- Type: "function",
- FunctionCall: schema.FunctionCall{
- Name: name,
- Arguments: args,
- },
- },
- },
- }}},
- Object: "chat.completion.chunk",
- }
- rawFinalResultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: &initialMessage}
- }
- }
- }
-
- close(rawFinalResultChannel)
- }()
-
- finalResultChannel = rawFinalResultChannel
- completionsChannel = rawCompletionsChannel
- tokenChannel = rawTokenChannel
- return
-}
-
-func (oais *OpenAIService) handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, args, prompt string) (string, error) {
- log.Debug().Msgf("[handleQuestion called] nothing to do, computing a reply")
-
- // If there is a message that the LLM already sends as part of the JSON reply, use it
- arguments := map[string]interface{}{}
- json.Unmarshal([]byte(args), &arguments)
- m, exists := arguments["message"]
- if exists {
- switch message := m.(type) {
- case string:
- if message != "" {
- log.Debug().Msgf("Reply received from LLM: %s", message)
- message = oais.llmbs.Finetune(*config, prompt, message)
- log.Debug().Msgf("Reply received from LLM(finetuned): %s", message)
-
- return message, nil
- }
- }
- }
-
- log.Debug().Msgf("No action received from LLM, without a message, computing a reply")
- // Otherwise ask the LLM to understand the JSON output and the context, and return a message
- // Note: This costs (in term of CPU/GPU) another computation
- config.Grammar = ""
- images := []string{}
- for _, m := range input.Messages {
- images = append(images, m.StringImages...)
- }
-
- resultChannel, _, err := oais.llmbs.Inference(input.Context, &backend.LLMRequest{
- Text: prompt,
- Images: images,
- RawMessages: input.Messages, // Experimental
- }, config, false)
-
- if err != nil {
- log.Error().Msgf("inference setup error: %s", err.Error())
- return "", err
- }
-
- raw := <-resultChannel
- if raw.Error != nil {
- log.Error().Msgf("inference error: %q", raw.Error.Error())
- return "", err
- }
- if raw.Value == nil {
- log.Warn().Msgf("nil inference response")
- return "", nil
- }
- return oais.llmbs.Finetune(*config, prompt, raw.Value.Response), nil
-}
-
-type funcCallResults struct {
- name string
- arguments string
-}
-
-func parseFunctionCall(llmresult string, multipleResults bool) []funcCallResults {
-
- results := []funcCallResults{}
-
- // TODO: use generics to avoid this code duplication
- if multipleResults {
- ss := []map[string]interface{}{}
- s := utils.EscapeNewLines(llmresult)
- json.Unmarshal([]byte(s), &ss)
-
- for _, s := range ss {
- func_name, ok := s["function"]
- if !ok {
- continue
- }
- args, ok := s["arguments"]
- if !ok {
- continue
- }
- d, _ := json.Marshal(args)
- funcName, ok := func_name.(string)
- if !ok {
- continue
- }
- results = append(results, funcCallResults{name: funcName, arguments: string(d)})
- }
- } else {
- // As we have to change the result before processing, we can't stream the answer token-by-token (yet?)
- ss := map[string]interface{}{}
- // This prevent newlines to break JSON parsing for clients
- s := utils.EscapeNewLines(llmresult)
- if err := json.Unmarshal([]byte(s), &ss); err != nil {
- log.Error().Msgf("error unmarshalling JSON: %s", err.Error())
- return results
- }
-
- // The grammar defines the function name as "function", while OpenAI returns "name"
- func_name, ok := ss["function"]
- if !ok {
- log.Debug().Msgf("ss[function] is not OK!, llm result: %q", llmresult)
- return results
- }
- // Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object
- args, ok := ss["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix)
- if !ok {
- log.Debug().Msg("ss[arguments] is not OK!")
- return results
- }
- d, _ := json.Marshal(args)
- funcName, ok := func_name.(string)
- if !ok {
- log.Debug().Msgf("unexpected func_name: %+v", func_name)
- return results
- }
- results = append(results, funcCallResults{name: funcName, arguments: string(d)})
- }
- return results
-}
diff --git a/core/startup/startup.go b/core/startup/startup.go
index 92ccaa9d..6298f034 100644
--- a/core/startup/startup.go
+++ b/core/startup/startup.go
@@ -4,21 +4,17 @@ import (
"fmt"
"os"
- "github.com/go-skynet/LocalAI/core"
- "github.com/go-skynet/LocalAI/core/backend"
"github.com/go-skynet/LocalAI/core/config"
- openaiendpoint "github.com/go-skynet/LocalAI/core/http/endpoints/openai" // TODO: This is dubious. Fix this when splitting assistant api up.
"github.com/go-skynet/LocalAI/core/services"
"github.com/go-skynet/LocalAI/internal"
"github.com/go-skynet/LocalAI/pkg/assets"
"github.com/go-skynet/LocalAI/pkg/model"
- "github.com/go-skynet/LocalAI/pkg/utils"
+ pkgStartup "github.com/go-skynet/LocalAI/pkg/startup"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
)
-// (*config.BackendConfigLoader, *model.ModelLoader, *config.ApplicationConfig, error) {
-func Startup(opts ...config.AppOption) (*core.Application, error) {
+func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.ModelLoader, *config.ApplicationConfig, error) {
options := config.NewApplicationConfig(opts...)
zerolog.SetGlobalLevel(zerolog.InfoLevel)
@@ -31,75 +27,68 @@ func Startup(opts ...config.AppOption) (*core.Application, error) {
// Make sure directories exists
if options.ModelPath == "" {
- return nil, fmt.Errorf("options.ModelPath cannot be empty")
+ return nil, nil, nil, fmt.Errorf("options.ModelPath cannot be empty")
}
err := os.MkdirAll(options.ModelPath, 0755)
if err != nil {
- return nil, fmt.Errorf("unable to create ModelPath: %q", err)
+ return nil, nil, nil, fmt.Errorf("unable to create ModelPath: %q", err)
}
if options.ImageDir != "" {
err := os.MkdirAll(options.ImageDir, 0755)
if err != nil {
- return nil, fmt.Errorf("unable to create ImageDir: %q", err)
+ return nil, nil, nil, fmt.Errorf("unable to create ImageDir: %q", err)
}
}
if options.AudioDir != "" {
err := os.MkdirAll(options.AudioDir, 0755)
if err != nil {
- return nil, fmt.Errorf("unable to create AudioDir: %q", err)
+ return nil, nil, nil, fmt.Errorf("unable to create AudioDir: %q", err)
}
}
if options.UploadDir != "" {
err := os.MkdirAll(options.UploadDir, 0755)
if err != nil {
- return nil, fmt.Errorf("unable to create UploadDir: %q", err)
- }
- }
- if options.ConfigsDir != "" {
- err := os.MkdirAll(options.ConfigsDir, 0755)
- if err != nil {
- return nil, fmt.Errorf("unable to create ConfigsDir: %q", err)
+ return nil, nil, nil, fmt.Errorf("unable to create UploadDir: %q", err)
}
}
- // Load config jsons
- utils.LoadConfig(options.UploadDir, openaiendpoint.UploadedFilesFile, &openaiendpoint.UploadedFiles)
- utils.LoadConfig(options.ConfigsDir, openaiendpoint.AssistantsConfigFile, &openaiendpoint.Assistants)
- utils.LoadConfig(options.ConfigsDir, openaiendpoint.AssistantsFileConfigFile, &openaiendpoint.AssistantFiles)
+ //
+ pkgStartup.PreloadModelsConfigurations(options.ModelLibraryURL, options.ModelPath, options.ModelsURL...)
- app := createApplication(options)
+ cl := config.NewBackendConfigLoader()
+ ml := model.NewModelLoader(options.ModelPath)
- services.PreloadModelsConfigurations(options.ModelLibraryURL, options.ModelPath, options.ModelsURL...)
+ configLoaderOpts := options.ToConfigLoaderOptions()
- if err := app.BackendConfigLoader.LoadBackendConfigsFromPath(options.ModelPath, app.ApplicationConfig.ToConfigLoaderOptions()...); err != nil {
+ if err := cl.LoadBackendConfigsFromPath(options.ModelPath, configLoaderOpts...); err != nil {
log.Error().Err(err).Msg("error loading config files")
}
if options.ConfigFile != "" {
- if err := app.BackendConfigLoader.LoadBackendConfigFile(options.ConfigFile, app.ApplicationConfig.ToConfigLoaderOptions()...); err != nil {
+ if err := cl.LoadBackendConfigFile(options.ConfigFile, configLoaderOpts...); err != nil {
log.Error().Err(err).Msg("error loading config file")
}
}
- if err := app.BackendConfigLoader.Preload(options.ModelPath); err != nil {
+ if err := cl.Preload(options.ModelPath); err != nil {
log.Error().Err(err).Msg("error downloading models")
}
if options.PreloadJSONModels != "" {
- if err := services.ApplyGalleryFromString(options.ModelPath, options.PreloadJSONModels, app.BackendConfigLoader, options.Galleries); err != nil {
- return nil, err
+ if err := services.ApplyGalleryFromString(options.ModelPath, options.PreloadJSONModels, cl, options.Galleries); err != nil {
+ return nil, nil, nil, err
}
}
if options.PreloadModelsFromPath != "" {
- if err := services.ApplyGalleryFromFile(options.ModelPath, options.PreloadModelsFromPath, app.BackendConfigLoader, options.Galleries); err != nil {
- return nil, err
+ if err := services.ApplyGalleryFromFile(options.ModelPath, options.PreloadModelsFromPath, cl, options.Galleries); err != nil {
+ return nil, nil, nil, err
}
}
if options.Debug {
- for _, v := range app.BackendConfigLoader.ListBackendConfigs() {
- cfg, _ := app.BackendConfigLoader.GetBackendConfig(v)
+ for _, v := range cl.ListBackendConfigs() {
+ cfg, _ := cl.GetBackendConfig(v)
log.Debug().Msgf("Model: %s (config: %+v)", v, cfg)
}
}
@@ -117,17 +106,17 @@ func Startup(opts ...config.AppOption) (*core.Application, error) {
go func() {
<-options.Context.Done()
log.Debug().Msgf("Context canceled, shutting down")
- app.ModelLoader.StopAllGRPC()
+ ml.StopAllGRPC()
}()
if options.WatchDog {
wd := model.NewWatchDog(
- app.ModelLoader,
+ ml,
options.WatchDogBusyTimeout,
options.WatchDogIdleTimeout,
options.WatchDogBusy,
options.WatchDogIdle)
- app.ModelLoader.SetWatchDog(wd)
+ ml.SetWatchDog(wd)
go wd.Run()
go func() {
<-options.Context.Done()
@@ -137,35 +126,5 @@ func Startup(opts ...config.AppOption) (*core.Application, error) {
}
log.Info().Msg("core/startup process completed!")
- return app, nil
-}
-
-// In Lieu of a proper DI framework, this function wires up the Application manually.
-// This is in core/startup rather than core/state.go to keep package references clean!
-func createApplication(appConfig *config.ApplicationConfig) *core.Application {
- app := &core.Application{
- ApplicationConfig: appConfig,
- BackendConfigLoader: config.NewBackendConfigLoader(),
- ModelLoader: model.NewModelLoader(appConfig.ModelPath),
- }
-
- var err error
-
- app.EmbeddingsBackendService = backend.NewEmbeddingsBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
- app.ImageGenerationBackendService = backend.NewImageGenerationBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
- app.LLMBackendService = backend.NewLLMBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
- app.TranscriptionBackendService = backend.NewTranscriptionBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
- app.TextToSpeechBackendService = backend.NewTextToSpeechBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
-
- app.BackendMonitorService = services.NewBackendMonitorService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
- app.GalleryService = services.NewGalleryService(app.ApplicationConfig.ModelPath)
- app.ListModelsService = services.NewListModelsService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
- app.OpenAIService = services.NewOpenAIService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig, app.LLMBackendService)
-
- app.LocalAIMetricsService, err = services.NewLocalAIMetricsService()
- if err != nil {
- log.Warn().Msg("Unable to initialize LocalAIMetricsService - non-fatal, optional service")
- }
-
- return app
+ return cl, ml, options, nil
}
diff --git a/core/state.go b/core/state.go
deleted file mode 100644
index cf0d614b..00000000
--- a/core/state.go
+++ /dev/null
@@ -1,41 +0,0 @@
-package core
-
-import (
- "github.com/go-skynet/LocalAI/core/backend"
- "github.com/go-skynet/LocalAI/core/config"
- "github.com/go-skynet/LocalAI/core/services"
- "github.com/go-skynet/LocalAI/pkg/model"
-)
-
-// TODO: Can I come up with a better name or location for this?
-// The purpose of this structure is to hold pointers to all initialized services, to make plumbing easy
-// Perhaps a proper DI system is worth it in the future, but for now keep things simple.
-type Application struct {
-
- // Application-Level Config
- ApplicationConfig *config.ApplicationConfig
- // ApplicationState *ApplicationState
-
- // Core Low-Level Services
- BackendConfigLoader *config.BackendConfigLoader
- ModelLoader *model.ModelLoader
-
- // Backend Services
- EmbeddingsBackendService *backend.EmbeddingsBackendService
- ImageGenerationBackendService *backend.ImageGenerationBackendService
- LLMBackendService *backend.LLMBackendService
- TranscriptionBackendService *backend.TranscriptionBackendService
- TextToSpeechBackendService *backend.TextToSpeechBackendService
-
- // LocalAI System Services
- BackendMonitorService *services.BackendMonitorService
- GalleryService *services.GalleryService
- ListModelsService *services.ListModelsService
- LocalAIMetricsService *services.LocalAIMetricsService
- OpenAIService *services.OpenAIService
-}
-
-// TODO [NEXT PR?]: Break up ApplicationConfig.
-// Migrate over stuff that is not set via config at all - especially runtime stuff
-type ApplicationState struct {
-}
diff --git a/examples/bruno/LocalAI Test Requests/llm text/-completions Stream.bru b/examples/bruno/LocalAI Test Requests/llm text/-completions Stream.bru
deleted file mode 100644
index c33bafe1..00000000
--- a/examples/bruno/LocalAI Test Requests/llm text/-completions Stream.bru
+++ /dev/null
@@ -1,25 +0,0 @@
-meta {
- name: -completions Stream
- type: http
- seq: 4
-}
-
-post {
- url: {{PROTOCOL}}{{HOST}}:{{PORT}}/completions
- body: json
- auth: none
-}
-
-headers {
- Content-Type: application/json
-}
-
-body:json {
- {
- "model": "{{DEFAULT_MODEL}}",
- "prompt": "function downloadFile(string url, string outputPath) {",
- "max_tokens": 256,
- "temperature": 0.5,
- "stream": true
- }
-}
diff --git a/pkg/concurrency/concurrency.go b/pkg/concurrency/concurrency.go
deleted file mode 100644
index 324e8cc5..00000000
--- a/pkg/concurrency/concurrency.go
+++ /dev/null
@@ -1,135 +0,0 @@
-package concurrency
-
-import (
- "sync"
-)
-
-// TODO: closeWhenDone bool parameter ::
-// It currently is experimental, and therefore exists.
-// Is there ever a situation to use false?
-
-// This function is used to merge the results of a slice of channels of a specific result type down to a single result channel of a second type.
-// mappingFn allows the caller to convert from the input type to the output type
-// if closeWhenDone is set to true, the output channel will be closed when all individual result channels of the slice have been closed - otherwise it will be left open for future use.
-// The same WaitGroup used to trigger that optional closing is returned for any other synchronization purposes.
-func SliceOfChannelsRawMerger[IndividualResultType any, OutputResultType any](individualResultChannels []<-chan IndividualResultType, outputChannel chan<- OutputResultType, mappingFn func(IndividualResultType) (OutputResultType, error), closeWhenDone bool) *sync.WaitGroup {
- var wg sync.WaitGroup
- wg.Add(len(individualResultChannels))
- mergingFn := func(c <-chan IndividualResultType) {
- for r := range c {
- mr, err := mappingFn(r)
- if err == nil {
- outputChannel <- mr
- }
- }
- wg.Done()
- }
- for _, irc := range individualResultChannels {
- go mergingFn(irc)
- }
- if closeWhenDone {
- go func() {
- wg.Wait()
- close(outputChannel)
- }()
- }
-
- return &wg
-}
-
-// This function is used to merge the results of a slice of channels of a specific result type down to a single result channel of THE SAME TYPE.
-// if closeWhenDone is set to true, the output channel will be closed when all individual result channels of the slice have been closed - otherwise it will be left open for future use.
-// The same WaitGroup used to trigger that optional closing is returned for any other synchronization purposes.
-func SliceOfChannelsRawMergerWithoutMapping[ResultType any](individualResultsChannels []<-chan ResultType, outputChannel chan<- ResultType, closeWhenDone bool) *sync.WaitGroup {
- return SliceOfChannelsRawMerger(individualResultsChannels, outputChannel, func(v ResultType) (ResultType, error) { return v, nil }, closeWhenDone)
-}
-
-// This function is used to merge the results of a slice of channels of a specific result type down to a single succcess result channel of a second type, and an error channel
-// mappingFn allows the caller to convert from the input type to the output type
-// This variant is designed to be aware of concurrency.ErrorOr[T], splitting successes from failures.
-// if closeWhenDone is set to true, the output channel will be closed when all individual result channels of the slice have been closed - otherwise it will be left open for future use.
-// The same WaitGroup used to trigger that optional closing is returned for any other synchronization purposes.
-func SliceOfChannelsMergerWithErrors[IndividualResultType any, OutputResultType any](individualResultChannels []<-chan ErrorOr[IndividualResultType], successChannel chan<- OutputResultType, errorChannel chan<- error, mappingFn func(IndividualResultType) (OutputResultType, error), closeWhenDone bool) *sync.WaitGroup {
- var wg sync.WaitGroup
- wg.Add(len(individualResultChannels))
- mergingFn := func(c <-chan ErrorOr[IndividualResultType]) {
- for r := range c {
- if r.Error != nil {
- errorChannel <- r.Error
- } else {
- mv, err := mappingFn(r.Value)
- if err != nil {
- errorChannel <- err
- } else {
- successChannel <- mv
- }
- }
- }
- wg.Done()
- }
- for _, irc := range individualResultChannels {
- go mergingFn(irc)
- }
- if closeWhenDone {
- go func() {
- wg.Wait()
- close(successChannel)
- close(errorChannel)
- }()
- }
- return &wg
-}
-
-// This function is used to reduce down the results of a slice of channels of a specific result type down to a single result value of a second type.
-// reducerFn allows the caller to convert from the input type to the output type
-// if closeWhenDone is set to true, the output channel will be closed when all individual result channels of the slice have been closed - otherwise it will be left open for future use.
-// The same WaitGroup used to trigger that optional closing is returned for any other synchronization purposes.
-func SliceOfChannelsReducer[InputResultType any, OutputResultType any](individualResultsChannels []<-chan InputResultType, outputChannel chan<- OutputResultType,
- reducerFn func(iv InputResultType, ov OutputResultType) OutputResultType, initialValue OutputResultType, closeWhenDone bool) (wg *sync.WaitGroup) {
- wg = &sync.WaitGroup{}
- wg.Add(len(individualResultsChannels))
- reduceLock := sync.Mutex{}
- reducingFn := func(c <-chan InputResultType) {
- for iv := range c {
- reduceLock.Lock()
- initialValue = reducerFn(iv, initialValue)
- reduceLock.Unlock()
- }
- wg.Done()
- }
- for _, irc := range individualResultsChannels {
- go reducingFn(irc)
- }
- go func() {
- wg.Wait()
- outputChannel <- initialValue
- if closeWhenDone {
- close(outputChannel)
- }
- }()
- return wg
-}
-
-// This function is primarily designed to be used in combination with the above utility functions.
-// A slice of input result channels of a specific type is provided, along with a function to map those values to another type
-// A slice of output result channels is returned, where each value is mapped as it comes in.
-// The order of the slice will be retained.
-func SliceOfChannelsTransformer[InputResultType any, OutputResultType any](inputChanels []<-chan InputResultType, mappingFn func(v InputResultType) OutputResultType) (outputChannels []<-chan OutputResultType) {
- rawOutputChannels := make([]<-chan OutputResultType, len(inputChanels))
-
- transformingFn := func(ic <-chan InputResultType, oc chan OutputResultType) {
- for iv := range ic {
- oc <- mappingFn(iv)
- }
- close(oc)
- }
-
- for ci, c := range inputChanels {
- roc := make(chan OutputResultType)
- go transformingFn(c, roc)
- rawOutputChannels[ci] = roc
- }
-
- outputChannels = rawOutputChannels
- return
-}
diff --git a/pkg/concurrency/concurrency_test.go b/pkg/concurrency/concurrency_test.go
deleted file mode 100644
index fedd74be..00000000
--- a/pkg/concurrency/concurrency_test.go
+++ /dev/null
@@ -1,101 +0,0 @@
-package concurrency_test
-
-// TODO: noramlly, these go in utils_tests, right? Why does this cause problems only in pkg/utils?
-
-import (
- "fmt"
- "slices"
-
- . "github.com/go-skynet/LocalAI/pkg/concurrency"
-
- . "github.com/onsi/ginkgo/v2"
- . "github.com/onsi/gomega"
-)
-
-var _ = Describe("utils/concurrency tests", func() {
- It("SliceOfChannelsReducer works", func() {
- individualResultsChannels := []<-chan int{}
- initialValue := 0
- for i := 0; i < 3; i++ {
- c := make(chan int)
- go func(i int, c chan int) {
- for ii := 1; ii < 4; ii++ {
- c <- (i * ii)
- }
- close(c)
- }(i, c)
- individualResultsChannels = append(individualResultsChannels, c)
- }
- Expect(len(individualResultsChannels)).To(Equal(3))
- finalResultChannel := make(chan int)
- wg := SliceOfChannelsReducer[int, int](individualResultsChannels, finalResultChannel, func(input int, val int) int {
- return val + input
- }, initialValue, true)
-
- Expect(wg).ToNot(BeNil())
-
- result := <-finalResultChannel
-
- Expect(result).ToNot(Equal(0))
- Expect(result).To(Equal(18))
- })
-
- It("SliceOfChannelsRawMergerWithoutMapping works", func() {
- individualResultsChannels := []<-chan int{}
- for i := 0; i < 3; i++ {
- c := make(chan int)
- go func(i int, c chan int) {
- for ii := 1; ii < 4; ii++ {
- c <- (i * ii)
- }
- close(c)
- }(i, c)
- individualResultsChannels = append(individualResultsChannels, c)
- }
- Expect(len(individualResultsChannels)).To(Equal(3))
- outputChannel := make(chan int)
- wg := SliceOfChannelsRawMergerWithoutMapping(individualResultsChannels, outputChannel, true)
- Expect(wg).ToNot(BeNil())
- outputSlice := []int{}
- for v := range outputChannel {
- outputSlice = append(outputSlice, v)
- }
- Expect(len(outputSlice)).To(Equal(9))
- slices.Sort(outputSlice)
- Expect(outputSlice[0]).To(BeZero())
- Expect(outputSlice[3]).To(Equal(1))
- Expect(outputSlice[8]).To(Equal(6))
- })
-
- It("SliceOfChannelsTransformer works", func() {
- individualResultsChannels := []<-chan int{}
- for i := 0; i < 3; i++ {
- c := make(chan int)
- go func(i int, c chan int) {
- for ii := 1; ii < 4; ii++ {
- c <- (i * ii)
- }
- close(c)
- }(i, c)
- individualResultsChannels = append(individualResultsChannels, c)
- }
- Expect(len(individualResultsChannels)).To(Equal(3))
- mappingFn := func(i int) string {
- return fmt.Sprintf("$%d", i)
- }
-
- outputChannels := SliceOfChannelsTransformer(individualResultsChannels, mappingFn)
- Expect(len(outputChannels)).To(Equal(3))
- rSlice := []string{}
- for ii := 1; ii < 4; ii++ {
- for i := 0; i < 3; i++ {
- res := <-outputChannels[i]
- rSlice = append(rSlice, res)
- }
- }
- slices.Sort(rSlice)
- Expect(rSlice[0]).To(Equal("$0"))
- Expect(rSlice[3]).To(Equal("$1"))
- Expect(rSlice[8]).To(Equal("$6"))
- })
-})
diff --git a/pkg/concurrency/types.go b/pkg/concurrency/types.go
deleted file mode 100644
index 76081ba3..00000000
--- a/pkg/concurrency/types.go
+++ /dev/null
@@ -1,6 +0,0 @@
-package concurrency
-
-type ErrorOr[T any] struct {
- Value T
- Error error
-}
diff --git a/pkg/grpc/backend.go b/pkg/grpc/backend.go
index 49a6b1bd..8fb8c39d 100644
--- a/pkg/grpc/backend.go
+++ b/pkg/grpc/backend.go
@@ -41,7 +41,7 @@ type Backend interface {
PredictStream(ctx context.Context, in *pb.PredictOptions, f func(s []byte), opts ...grpc.CallOption) error
GenerateImage(ctx context.Context, in *pb.GenerateImageRequest, opts ...grpc.CallOption) (*pb.Result, error)
TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error)
- AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.TranscriptionResult, error)
+ AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.Result, error)
TokenizeString(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.TokenizationResponse, error)
Status(ctx context.Context) (*pb.StatusResponse, error)
diff --git a/pkg/grpc/base/base.go b/pkg/grpc/base/base.go
index c0b4bc34..0af5d94f 100644
--- a/pkg/grpc/base/base.go
+++ b/pkg/grpc/base/base.go
@@ -53,8 +53,8 @@ func (llm *Base) GenerateImage(*pb.GenerateImageRequest) error {
return fmt.Errorf("unimplemented")
}
-func (llm *Base) AudioTranscription(*pb.TranscriptRequest) (schema.TranscriptionResult, error) {
- return schema.TranscriptionResult{}, fmt.Errorf("unimplemented")
+func (llm *Base) AudioTranscription(*pb.TranscriptRequest) (schema.Result, error) {
+ return schema.Result{}, fmt.Errorf("unimplemented")
}
func (llm *Base) TTS(*pb.TTSRequest) error {
diff --git a/pkg/grpc/client.go b/pkg/grpc/client.go
index 0e0e56c7..882db12a 100644
--- a/pkg/grpc/client.go
+++ b/pkg/grpc/client.go
@@ -210,7 +210,7 @@ func (c *Client) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOp
return client.TTS(ctx, in, opts...)
}
-func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.TranscriptionResult, error) {
+func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.Result, error) {
if !c.parallel {
c.opMutex.Lock()
defer c.opMutex.Unlock()
@@ -231,7 +231,7 @@ func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptReques
if err != nil {
return nil, err
}
- tresult := &schema.TranscriptionResult{}
+ tresult := &schema.Result{}
for _, s := range res.Segments {
tks := []int{}
for _, t := range s.Tokens {
diff --git a/pkg/grpc/embed.go b/pkg/grpc/embed.go
index b4ba4884..73b185a3 100644
--- a/pkg/grpc/embed.go
+++ b/pkg/grpc/embed.go
@@ -53,12 +53,12 @@ func (e *embedBackend) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.
return e.s.TTS(ctx, in)
}
-func (e *embedBackend) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.TranscriptionResult, error) {
+func (e *embedBackend) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.Result, error) {
r, err := e.s.AudioTranscription(ctx, in)
if err != nil {
return nil, err
}
- tr := &schema.TranscriptionResult{}
+ tr := &schema.Result{}
for _, s := range r.Segments {
var tks []int
for _, t := range s.Tokens {
diff --git a/pkg/grpc/interface.go b/pkg/grpc/interface.go
index aa7a3fbc..4d06544d 100644
--- a/pkg/grpc/interface.go
+++ b/pkg/grpc/interface.go
@@ -15,7 +15,7 @@ type LLM interface {
Load(*pb.ModelOptions) error
Embeddings(*pb.PredictOptions) ([]float32, error)
GenerateImage(*pb.GenerateImageRequest) error
- AudioTranscription(*pb.TranscriptRequest) (schema.TranscriptionResult, error)
+ AudioTranscription(*pb.TranscriptRequest) (schema.Result, error)
TTS(*pb.TTSRequest) error
TokenizeString(*pb.PredictOptions) (pb.TokenizationResponse, error)
Status() (pb.StatusResponse, error)
diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go
index 617d8f62..5d9808a4 100644
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -81,7 +81,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
if _, err := os.Stat(uri); err == nil {
serverAddress, err := getFreeAddress()
if err != nil {
- return "", fmt.Errorf("%s failed allocating free ports: %s", backend, err.Error())
+ return "", fmt.Errorf("failed allocating free ports: %s", err.Error())
}
// Make sure the process is executable
if err := ml.startProcess(uri, o.model, serverAddress); err != nil {
@@ -134,7 +134,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
if !ready {
log.Debug().Msgf("GRPC Service NOT ready")
- return "", fmt.Errorf("%s grpc service not ready", backend)
+ return "", fmt.Errorf("grpc service not ready")
}
options := *o.gRPCOptions
@@ -145,10 +145,10 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
res, err := client.GRPC(o.parallelRequests, ml.wd).LoadModel(o.context, &options)
if err != nil {
- return "", fmt.Errorf("\"%s\" could not load model: %w", backend, err)
+ return "", fmt.Errorf("could not load model: %w", err)
}
if !res.Success {
- return "", fmt.Errorf("\"%s\" could not load model (no success): %s", backend, res.Message)
+ return "", fmt.Errorf("could not load model (no success): %s", res.Message)
}
return client, nil
diff --git a/pkg/startup/model_preload.go b/pkg/startup/model_preload.go
new file mode 100644
index 00000000..b09516a7
--- /dev/null
+++ b/pkg/startup/model_preload.go
@@ -0,0 +1,85 @@
+package startup
+
+import (
+ "errors"
+ "os"
+ "path/filepath"
+
+ "github.com/go-skynet/LocalAI/embedded"
+ "github.com/go-skynet/LocalAI/pkg/downloader"
+ "github.com/go-skynet/LocalAI/pkg/utils"
+ "github.com/rs/zerolog/log"
+)
+
+// PreloadModelsConfigurations will preload models from the given list of URLs
+// It will download the model if it is not already present in the model path
+// It will also try to resolve if the model is an embedded model YAML configuration
+func PreloadModelsConfigurations(modelLibraryURL string, modelPath string, models ...string) {
+ for _, url := range models {
+
+ // As a best effort, try to resolve the model from the remote library
+ // if it's not resolved we try with the other method below
+ if modelLibraryURL != "" {
+ lib, err := embedded.GetRemoteLibraryShorteners(modelLibraryURL)
+ if err == nil {
+ if lib[url] != "" {
+ log.Debug().Msgf("[startup] model configuration is defined remotely: %s (%s)", url, lib[url])
+ url = lib[url]
+ }
+ }
+ }
+
+ url = embedded.ModelShortURL(url)
+ switch {
+ case embedded.ExistsInModelsLibrary(url):
+ modelYAML, err := embedded.ResolveContent(url)
+ // If we resolve something, just save it to disk and continue
+ if err != nil {
+ log.Error().Err(err).Msg("error resolving model content")
+ continue
+ }
+
+ log.Debug().Msgf("[startup] resolved embedded model: %s", url)
+ md5Name := utils.MD5(url)
+ modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml"
+ if err := os.WriteFile(modelDefinitionFilePath, modelYAML, os.ModePerm); err != nil {
+ log.Error().Err(err).Str("filepath", modelDefinitionFilePath).Msg("error writing model definition")
+ }
+ case downloader.LooksLikeURL(url):
+ log.Debug().Msgf("[startup] resolved model to download: %s", url)
+
+ // md5 of model name
+ md5Name := utils.MD5(url)
+
+ // check if file exists
+ if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) {
+ modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml"
+ err := downloader.DownloadFile(url, modelDefinitionFilePath, "", func(fileName, current, total string, percent float64) {
+ utils.DisplayDownloadFunction(fileName, current, total, percent)
+ })
+ if err != nil {
+ log.Error().Err(err).Str("url", url).Str("filepath", modelDefinitionFilePath).Msg("error downloading model")
+ }
+ }
+ default:
+ if _, err := os.Stat(url); err == nil {
+ log.Debug().Msgf("[startup] resolved local model: %s", url)
+ // copy to modelPath
+ md5Name := utils.MD5(url)
+
+ modelYAML, err := os.ReadFile(url)
+ if err != nil {
+ log.Error().Err(err).Str("filepath", url).Msg("error reading model definition")
+ continue
+ }
+
+ modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml"
+ if err := os.WriteFile(modelDefinitionFilePath, modelYAML, os.ModePerm); err != nil {
+ log.Error().Err(err).Str("filepath", modelDefinitionFilePath).Msg("error loading model: %s")
+ }
+ } else {
+ log.Warn().Msgf("[startup] failed resolving model '%s'", url)
+ }
+ }
+ }
+}
diff --git a/core/services/model_preload_test.go b/pkg/startup/model_preload_test.go
similarity index 96%
rename from core/services/model_preload_test.go
rename to pkg/startup/model_preload_test.go
index fc65d565..63a8f8b0 100644
--- a/core/services/model_preload_test.go
+++ b/pkg/startup/model_preload_test.go
@@ -1,14 +1,13 @@
-package services_test
+package startup_test
import (
"fmt"
"os"
"path/filepath"
+ . "github.com/go-skynet/LocalAI/pkg/startup"
"github.com/go-skynet/LocalAI/pkg/utils"
- . "github.com/go-skynet/LocalAI/core/services"
-
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
diff --git a/pkg/utils/base64.go b/pkg/utils/base64.go
deleted file mode 100644
index 769d8a88..00000000
--- a/pkg/utils/base64.go
+++ /dev/null
@@ -1,50 +0,0 @@
-package utils
-
-import (
- "encoding/base64"
- "fmt"
- "io"
- "net/http"
- "strings"
- "time"
-)
-
-var base64DownloadClient http.Client = http.Client{
- Timeout: 30 * time.Second,
-}
-
-// this function check if the string is an URL, if it's an URL downloads the image in memory
-// encodes it in base64 and returns the base64 string
-
-// This may look weird down in pkg/utils while it is currently only used in core/config
-//
-// but I believe it may be useful for MQTT as well in the near future, so I'm
-// extracting it while I'm thinking of it.
-func GetImageURLAsBase64(s string) (string, error) {
- if strings.HasPrefix(s, "http") {
- // download the image
- resp, err := base64DownloadClient.Get(s)
- if err != nil {
- return "", err
- }
- defer resp.Body.Close()
-
- // read the image data into memory
- data, err := io.ReadAll(resp.Body)
- if err != nil {
- return "", err
- }
-
- // encode the image data in base64
- encoded := base64.StdEncoding.EncodeToString(data)
-
- // return the base64 string
- return encoded, nil
- }
-
- // if the string instead is prefixed with "data:image/jpeg;base64,", drop it
- if strings.HasPrefix(s, "data:image/jpeg;base64,") {
- return strings.ReplaceAll(s, "data:image/jpeg;base64,", ""), nil
- }
- return "", fmt.Errorf("not valid string")
-}
From e9f090257c57181ffd411052e6b818ff6f09550f Mon Sep 17 00:00:00 2001
From: cryptk <421501+cryptk@users.noreply.github.com>
Date: Wed, 17 Apr 2024 20:59:05 -0500
Subject: [PATCH 0175/2750] fix: adjust some sources names to match the naming
of their repositories (#2061)
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
---
Makefile | 60 ++++++++++++++++++++++++++++----------------------------
1 file changed, 30 insertions(+), 30 deletions(-)
diff --git a/Makefile b/Makefile
index 6715e91e..d236f860 100644
--- a/Makefile
+++ b/Makefile
@@ -179,20 +179,20 @@ endif
all: help
## BERT embeddings
-sources/go-bert:
- git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp sources/go-bert
- cd sources/go-bert && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
+sources/go-bert.cpp:
+ git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp sources/go-bert.cpp
+ cd sources/go-bert.cpp && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
-sources/go-bert/libgobert.a: sources/go-bert
- $(MAKE) -C sources/go-bert libgobert.a
+sources/go-bert.cpp/libgobert.a: sources/go-bert.cpp
+ $(MAKE) -C sources/go-bert.cpp libgobert.a
-## go-llama-ggml
-sources/go-llama-ggml:
- git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama-ggml
- cd sources/go-llama-ggml && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1
+## go-llama.cpp
+sources/go-llama.cpp:
+ git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama.cpp
+ cd sources/go-llama.cpp && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1
-sources/go-llama-ggml/libbinding.a: sources/go-llama-ggml
- $(MAKE) -C sources/go-llama-ggml BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
+sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
+ $(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
## go-piper
sources/go-piper:
@@ -211,12 +211,12 @@ sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a
## RWKV
-sources/go-rwkv:
- git clone --recurse-submodules $(RWKV_REPO) sources/go-rwkv
- cd sources/go-rwkv && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1
+sources/go-rwkv.cpp:
+ git clone --recurse-submodules $(RWKV_REPO) sources/go-rwkv.cpp
+ cd sources/go-rwkv.cpp && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1
-sources/go-rwkv/librwkv.a: sources/go-rwkv
- cd sources/go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
+sources/go-rwkv.cpp/librwkv.a: sources/go-rwkv.cpp
+ cd sources/go-rwkv.cpp && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
## stable diffusion
sources/go-stable-diffusion:
@@ -236,23 +236,24 @@ sources/go-tiny-dream/libtinydream.a: sources/go-tiny-dream
## whisper
sources/whisper.cpp:
- git clone https://github.com/ggerganov/whisper.cpp.git sources/whisper.cpp
+ git clone https://github.com/ggerganov/whisper.cpp sources/whisper.cpp
cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
cd sources/whisper.cpp && make libwhisper.a
-get-sources: sources/go-llama-ggml sources/gpt4all sources/go-piper sources/go-rwkv sources/whisper.cpp sources/go-bert sources/go-stable-diffusion sources/go-tiny-dream
+get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream
replace:
- $(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv
+ $(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
- $(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert
+ $(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert.cpp
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang
+ $(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
dropreplace:
$(GOCMD) mod edit -dropreplace github.com/donomii/go-rwkv.cpp
@@ -271,12 +272,12 @@ prepare-sources: get-sources replace
## GENERIC
rebuild: ## Rebuilds the project
$(GOCMD) clean -cache
- $(MAKE) -C sources/go-llama-ggml clean
+ $(MAKE) -C sources/go-llama.cpp clean
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ clean
- $(MAKE) -C sources/go-rwkv clean
+ $(MAKE) -C sources/go-rwkv.cpp clean
$(MAKE) -C sources/whisper.cpp clean
$(MAKE) -C sources/go-stable-diffusion clean
- $(MAKE) -C sources/go-bert clean
+ $(MAKE) -C sources/go-bert.cpp clean
$(MAKE) -C sources/go-piper clean
$(MAKE) -C sources/go-tiny-dream clean
$(MAKE) build
@@ -598,8 +599,8 @@ backend-assets/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/
backend-assets/grpc: protogen-go replace
mkdir -p backend-assets/grpc
-backend-assets/grpc/bert-embeddings: sources/go-bert sources/go-bert/libgobert.a backend-assets/grpc
- CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert LIBRARY_PATH=$(CURDIR)/sources/go-bert \
+backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc
+ CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc
@@ -641,17 +642,16 @@ ifeq ($(BUILD_TYPE),metal)
cp backend/cpp/llama/llama.cpp/build/bin/default.metallib backend-assets/grpc/
endif
-backend-assets/grpc/llama-ggml: sources/go-llama-ggml sources/go-llama-ggml/libbinding.a backend-assets/grpc
- $(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama-ggml
- CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama-ggml LIBRARY_PATH=$(CURDIR)/sources/go-llama-ggml \
+backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
+ CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
-backend-assets/grpc/rwkv: sources/go-rwkv sources/go-rwkv/librwkv.a backend-assets/grpc
- CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv LIBRARY_PATH=$(CURDIR)/sources/go-rwkv \
+backend-assets/grpc/rwkv: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a backend-assets/grpc
+ CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv.cpp LIBRARY_PATH=$(CURDIR)/sources/go-rwkv.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
From 502c1eedaa61ae742bfd6eb2e074e6f1180c2c66 Mon Sep 17 00:00:00 2001
From: cryptk <421501+cryptk@users.noreply.github.com>
Date: Wed, 17 Apr 2024 22:21:55 -0500
Subject: [PATCH 0176/2750] feat: refactor the dynamic json configs for
api_keys and external_backends (#2055)
* feat: refactor the dynamic json configs for api_keys and external_backends
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
* fix: remove commented code
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
---------
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
Signed-off-by: Ettore Di Giacinto
Co-authored-by: Ettore Di Giacinto
---
core/cli/run.go | 13 +--
core/config/application_config.go | 7 ++
core/startup/config_file_watcher.go | 154 +++++++++++++++++++---------
core/startup/startup.go | 5 +
4 files changed, 117 insertions(+), 62 deletions(-)
diff --git a/core/cli/run.go b/core/cli/run.go
index 0f3ba2de..d729f946 100644
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -2,7 +2,6 @@ package cli
import (
"fmt"
- "os"
"strings"
"time"
@@ -65,6 +64,7 @@ func (r *RunCMD) Run(ctx *Context) error {
config.WithAudioDir(r.AudioPath),
config.WithUploadDir(r.UploadPath),
config.WithConfigsDir(r.ConfigPath),
+ config.WithDynamicConfigDir(r.LocalaiConfigDir),
config.WithF16(r.F16),
config.WithStringGalleries(r.Galleries),
config.WithModelLibraryURL(r.RemoteLibrary),
@@ -134,17 +134,6 @@ func (r *RunCMD) Run(ctx *Context) error {
return fmt.Errorf("failed basic startup tasks with error %s", err.Error())
}
- // Watch the configuration directory
- // If the directory does not exist, we don't watch it
- if _, err := os.Stat(r.LocalaiConfigDir); err == nil {
- closeConfigWatcherFn, err := startup.WatchConfigDirectory(r.LocalaiConfigDir, options)
- defer closeConfigWatcherFn()
-
- if err != nil {
- return fmt.Errorf("failed while watching configuration directory %s", r.LocalaiConfigDir)
- }
- }
-
appHTTP, err := http.App(cl, ml, options)
if err != nil {
log.Error().Err(err).Msg("error during HTTP App construction")
diff --git a/core/config/application_config.go b/core/config/application_config.go
index 9525553a..77817616 100644
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@@ -22,6 +22,7 @@ type ApplicationConfig struct {
AudioDir string
UploadDir string
ConfigsDir string
+ DynamicConfigsDir string
CORS bool
PreloadJSONModels string
PreloadModelsFromPath string
@@ -264,6 +265,12 @@ func WithConfigsDir(configsDir string) AppOption {
}
}
+func WithDynamicConfigDir(dynamicConfigsDir string) AppOption {
+ return func(o *ApplicationConfig) {
+ o.DynamicConfigsDir = dynamicConfigsDir
+ }
+}
+
func WithApiKeys(apiKeys []string) AppOption {
return func(o *ApplicationConfig) {
o.ApiKeys = apiKeys
diff --git a/core/startup/config_file_watcher.go b/core/startup/config_file_watcher.go
index 9c758e25..5d213df5 100644
--- a/core/startup/config_file_watcher.go
+++ b/core/startup/config_file_watcher.go
@@ -12,89 +12,143 @@ import (
"github.com/rs/zerolog/log"
)
-type WatchConfigDirectoryCloser func() error
+type fileHandler func(fileContent []byte, appConfig *config.ApplicationConfig) error
-func ReadApiKeysJson(configDir string, appConfig *config.ApplicationConfig) error {
- fileContent, err := os.ReadFile(path.Join(configDir, "api_keys.json"))
- if err == nil {
- // Parse JSON content from the file
- var fileKeys []string
- err := json.Unmarshal(fileContent, &fileKeys)
- if err == nil {
- appConfig.ApiKeys = append(appConfig.ApiKeys, fileKeys...)
- return nil
- }
- return err
- }
- return err
+type configFileHandler struct {
+ handlers map[string]fileHandler
+
+ watcher *fsnotify.Watcher
+
+ configDir string
+ appConfig *config.ApplicationConfig
}
-func ReadExternalBackendsJson(configDir string, appConfig *config.ApplicationConfig) error {
- fileContent, err := os.ReadFile(path.Join(configDir, "external_backends.json"))
- if err != nil {
- return err
+// TODO: This should be a singleton eventually so other parts of the code can register config file handlers,
+// then we can export it to other packages
+func newConfigFileHandler(appConfig *config.ApplicationConfig) configFileHandler {
+ c := configFileHandler{
+ handlers: make(map[string]fileHandler),
+ configDir: appConfig.DynamicConfigsDir,
+ appConfig: appConfig,
}
- // Parse JSON content from the file
- var fileBackends map[string]string
- err = json.Unmarshal(fileContent, &fileBackends)
- if err != nil {
- return err
+ c.Register("api_keys.json", readApiKeysJson(*appConfig), true)
+ c.Register("external_backends.json", readExternalBackendsJson(*appConfig), true)
+ return c
+}
+
+func (c *configFileHandler) Register(filename string, handler fileHandler, runNow bool) error {
+ _, ok := c.handlers[filename]
+ if ok {
+ return fmt.Errorf("handler already registered for file %s", filename)
}
- err = mergo.Merge(&appConfig.ExternalGRPCBackends, fileBackends)
- if err != nil {
- return err
+ c.handlers[filename] = handler
+ if runNow {
+ c.callHandler(path.Join(c.appConfig.DynamicConfigsDir, filename), handler)
}
return nil
}
-var CONFIG_FILE_UPDATES = map[string]func(configDir string, appConfig *config.ApplicationConfig) error{
- "api_keys.json": ReadApiKeysJson,
- "external_backends.json": ReadExternalBackendsJson,
+func (c *configFileHandler) callHandler(filename string, handler fileHandler) {
+ fileContent, err := os.ReadFile(filename)
+ if err != nil && !os.IsNotExist(err) {
+ log.Error().Err(err).Str("filename", filename).Msg("could not read file")
+ }
+
+ if err = handler(fileContent, c.appConfig); err != nil {
+ log.Error().Err(err).Msg("WatchConfigDirectory goroutine failed to update options")
+ }
}
-func WatchConfigDirectory(configDir string, appConfig *config.ApplicationConfig) (WatchConfigDirectoryCloser, error) {
- if len(configDir) == 0 {
- return nil, fmt.Errorf("configDir blank")
- }
+func (c *configFileHandler) Watch() error {
configWatcher, err := fsnotify.NewWatcher()
+ c.watcher = configWatcher
if err != nil {
- log.Fatal().Msgf("Unable to create a watcher for the LocalAI Configuration Directory: %+v", err)
- }
- ret := func() error {
- configWatcher.Close()
- return nil
+ log.Fatal().Err(err).Str("configdir", c.configDir).Msg("wnable to create a watcher for configuration directory")
}
// Start listening for events.
go func() {
for {
select {
- case event, ok := <-configWatcher.Events:
+ case event, ok := <-c.watcher.Events:
if !ok {
return
}
- if event.Has(fsnotify.Write) {
- for targetName, watchFn := range CONFIG_FILE_UPDATES {
- if event.Name == targetName {
- err := watchFn(configDir, appConfig)
- log.Warn().Msgf("WatchConfigDirectory goroutine for %s: failed to update options: %+v", targetName, err)
- }
+ if event.Has(fsnotify.Write | fsnotify.Create | fsnotify.Remove) {
+ handler, ok := c.handlers[path.Base(event.Name)]
+ if !ok {
+ continue
}
+
+ c.callHandler(event.Name, handler)
}
- case _, ok := <-configWatcher.Errors:
+ case err, ok := <-c.watcher.Errors:
+ log.Error().Err(err).Msg("config watcher error received")
if !ok {
return
}
- log.Error().Err(err).Msg("error encountered while watching config directory")
}
}
}()
// Add a path.
- err = configWatcher.Add(configDir)
+ err = c.watcher.Add(c.appConfig.DynamicConfigsDir)
if err != nil {
- return ret, fmt.Errorf("unable to establish watch on the LocalAI Configuration Directory: %+v", err)
+ return fmt.Errorf("unable to establish watch on the LocalAI Configuration Directory: %+v", err)
}
- return ret, nil
+ return nil
+}
+
+// TODO: When we institute graceful shutdown, this should be called
+func (c *configFileHandler) Stop() {
+ c.watcher.Close()
+}
+
+func readApiKeysJson(startupAppConfig config.ApplicationConfig) fileHandler {
+ handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error {
+ log.Debug().Msg("processing api_keys.json")
+
+ if len(fileContent) > 0 {
+ // Parse JSON content from the file
+ var fileKeys []string
+ err := json.Unmarshal(fileContent, &fileKeys)
+ if err != nil {
+ return err
+ }
+
+ appConfig.ApiKeys = append(startupAppConfig.ApiKeys, fileKeys...)
+ } else {
+ appConfig.ApiKeys = startupAppConfig.ApiKeys
+ }
+ log.Debug().Msg("api keys loaded from api_keys.json")
+ return nil
+ }
+
+ return handler
+}
+
+func readExternalBackendsJson(startupAppConfig config.ApplicationConfig) fileHandler {
+ handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error {
+ log.Debug().Msg("processing external_backends.json")
+
+ if len(fileContent) > 0 {
+ // Parse JSON content from the file
+ var fileBackends map[string]string
+ err := json.Unmarshal(fileContent, &fileBackends)
+ if err != nil {
+ return err
+ }
+ appConfig.ExternalGRPCBackends = startupAppConfig.ExternalGRPCBackends
+ err = mergo.Merge(&appConfig.ExternalGRPCBackends, &fileBackends)
+ if err != nil {
+ return err
+ }
+ } else {
+ appConfig.ExternalGRPCBackends = startupAppConfig.ExternalGRPCBackends
+ }
+ log.Debug().Msg("external backends loaded from external_backends.json")
+ return nil
+ }
+ return handler
}
diff --git a/core/startup/startup.go b/core/startup/startup.go
index 6298f034..af92f0e1 100644
--- a/core/startup/startup.go
+++ b/core/startup/startup.go
@@ -125,6 +125,11 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode
}()
}
+ // Watch the configuration directory
+ // If the directory does not exist, we don't watch it
+ configHandler := newConfigFileHandler(options)
+ configHandler.Watch()
+
log.Info().Msg("core/startup process completed!")
return cl, ml, options, nil
}
From f9c75d487851749d3b382f64bb3d8a9bf52d94dd Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Thu, 18 Apr 2024 10:57:24 +0200
Subject: [PATCH 0177/2750] tests: add template tests (#2063)
Signed-off-by: Ettore Di Giacinto
---
pkg/model/loader_test.go | 105 ++++++++++++++++++++++++++++++++++
pkg/model/model_suite_test.go | 13 +++++
2 files changed, 118 insertions(+)
create mode 100644 pkg/model/loader_test.go
create mode 100644 pkg/model/model_suite_test.go
diff --git a/pkg/model/loader_test.go b/pkg/model/loader_test.go
new file mode 100644
index 00000000..4c3c1a88
--- /dev/null
+++ b/pkg/model/loader_test.go
@@ -0,0 +1,105 @@
+package model_test
+
+import (
+ "github.com/go-skynet/LocalAI/pkg/model"
+ . "github.com/go-skynet/LocalAI/pkg/model"
+
+ . "github.com/onsi/ginkgo/v2"
+ . "github.com/onsi/gomega"
+)
+
+const chatML = `<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
+{{- if .FunctionCall }}
+
+{{- else if eq .RoleName "tool" }}
+
+{{- end }}
+{{- if .Content}}
+{{.Content }}
+{{- end }}
+{{- if .FunctionCall}}
+{{toJson .FunctionCall}}
+{{- end }}
+{{- if .FunctionCall }}
+
+{{- else if eq .RoleName "tool" }}
+
+{{- end }}
+<|im_end|>`
+
+var testMatch map[string]map[string]interface{} = map[string]map[string]interface{}{
+ "user": {
+ "template": chatML,
+ "expected": "<|im_start|>user\nA long time ago in a galaxy far, far away...\n<|im_end|>",
+ "data": model.ChatMessageTemplateData{
+ SystemPrompt: "",
+ Role: "user",
+ RoleName: "user",
+ Content: "A long time ago in a galaxy far, far away...",
+ FunctionCall: nil,
+ FunctionName: "",
+ LastMessage: false,
+ Function: false,
+ MessageIndex: 0,
+ },
+ },
+ "assistant": {
+ "template": chatML,
+ "expected": "<|im_start|>assistant\nA long time ago in a galaxy far, far away...\n<|im_end|>",
+ "data": model.ChatMessageTemplateData{
+ SystemPrompt: "",
+ Role: "assistant",
+ RoleName: "assistant",
+ Content: "A long time ago in a galaxy far, far away...",
+ FunctionCall: nil,
+ FunctionName: "",
+ LastMessage: false,
+ Function: false,
+ MessageIndex: 0,
+ },
+ },
+ "function_call": {
+ "template": chatML,
+ "expected": "<|im_start|>assistant\n\n{\"function\":\"test\"}\n \n<|im_end|>",
+ "data": model.ChatMessageTemplateData{
+ SystemPrompt: "",
+ Role: "assistant",
+ RoleName: "assistant",
+ Content: "",
+ FunctionCall: map[string]string{"function": "test"},
+ FunctionName: "",
+ LastMessage: false,
+ Function: false,
+ MessageIndex: 0,
+ },
+ },
+ "function_response": {
+ "template": chatML,
+ "expected": "<|im_start|>tool\n\nResponse from tool\n \n<|im_end|>",
+ "data": model.ChatMessageTemplateData{
+ SystemPrompt: "",
+ Role: "tool",
+ RoleName: "tool",
+ Content: "Response from tool",
+ FunctionCall: nil,
+ FunctionName: "",
+ LastMessage: false,
+ Function: false,
+ MessageIndex: 0,
+ },
+ },
+}
+
+var _ = Describe("Templates", func() {
+ Context("chat message", func() {
+ modelLoader := NewModelLoader("")
+ for key := range testMatch {
+ foo := testMatch[key]
+ It("renders correctly "+key, func() {
+ templated, err := modelLoader.EvaluateTemplateForChatMessage(foo["template"].(string), foo["data"].(model.ChatMessageTemplateData))
+ Expect(err).ToNot(HaveOccurred())
+ Expect(templated).To(Equal(foo["expected"]), templated)
+ })
+ }
+ })
+})
diff --git a/pkg/model/model_suite_test.go b/pkg/model/model_suite_test.go
new file mode 100644
index 00000000..6fa9c004
--- /dev/null
+++ b/pkg/model/model_suite_test.go
@@ -0,0 +1,13 @@
+package model_test
+
+import (
+ "testing"
+
+ . "github.com/onsi/ginkgo/v2"
+ . "github.com/onsi/gomega"
+)
+
+func TestModel(t *testing.T) {
+ RegisterFailHandler(Fail)
+ RunSpecs(t, "LocalAI model test")
+}
From 8f2681f90420b4818ee270b4ad7c570ed462b09c Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 18 Apr 2024 17:17:33 +0000
Subject: [PATCH 0178/2750] build(deps): bump aiohttp from 3.9.2 to 3.9.4 in
/examples/langchain/langchainpy-localai-example in the pip group across 1
directory (#2067)
build(deps): bump aiohttp
Bumps the pip group with 1 update in the /examples/langchain/langchainpy-localai-example directory: [aiohttp](https://github.com/aio-libs/aiohttp).
Updates `aiohttp` from 3.9.2 to 3.9.4
- [Release notes](https://github.com/aio-libs/aiohttp/releases)
- [Changelog](https://github.com/aio-libs/aiohttp/blob/master/CHANGES.rst)
- [Commits](https://github.com/aio-libs/aiohttp/compare/v3.9.2...v3.9.4)
---
updated-dependencies:
- dependency-name: aiohttp
dependency-type: direct:production
dependency-group: pip
...
Signed-off-by: dependabot[bot]
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
examples/langchain/langchainpy-localai-example/requirements.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt
index 1e63b0bf..ba7f8429 100644
--- a/examples/langchain/langchainpy-localai-example/requirements.txt
+++ b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -1,4 +1,4 @@
-aiohttp==3.9.2
+aiohttp==3.9.4
aiosignal==1.3.1
async-timeout==4.0.2
attrs==23.1.0
From 13012cfa70d8440a78d3a9c88500597c8cc8ed98 Mon Sep 17 00:00:00 2001
From: cryptk <421501+cryptk@users.noreply.github.com>
Date: Thu, 18 Apr 2024 15:19:36 -0500
Subject: [PATCH 0179/2750] feat: better control of GRPC docker cache (#2070)
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
---
.github/workflows/generate_grpc_cache.yaml | 90 ++++++++++++++++++++++
.github/workflows/image-pr.yml | 9 ++-
.github/workflows/image.yml | 22 ++++--
.github/workflows/image_build.yml | 15 ++--
Dockerfile | 3 +-
5 files changed, 126 insertions(+), 13 deletions(-)
create mode 100644 .github/workflows/generate_grpc_cache.yaml
diff --git a/.github/workflows/generate_grpc_cache.yaml b/.github/workflows/generate_grpc_cache.yaml
new file mode 100644
index 00000000..11abc10a
--- /dev/null
+++ b/.github/workflows/generate_grpc_cache.yaml
@@ -0,0 +1,90 @@
+name: 'generate and publish GRPC docker caches'
+
+on:
+- workflow_dispatch
+
+concurrency:
+ group: ci-${{ github.head_ref || github.ref }}-${{ github.repository }}
+ cancel-in-progress: true
+
+jobs:
+ generate_caches:
+ strategy:
+ matrix:
+ include:
+ - grpc-base-image: ubuntu:22.04
+ runs-on: 'ubuntu-latest'
+ platforms: 'linux/amd64'
+ runs-on: ${{matrix.runs-on}}
+ steps:
+ - name: Release space from worker
+ if: matrix.runs-on == 'ubuntu-latest'
+ run: |
+ echo "Listing top largest packages"
+ pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
+ head -n 30 <<< "${pkgs}"
+ echo
+ df -h
+ echo
+ sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
+ sudo apt-get remove --auto-remove android-sdk-platform-tools || true
+ sudo apt-get purge --auto-remove android-sdk-platform-tools || true
+ sudo rm -rf /usr/local/lib/android
+ sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
+ sudo rm -rf /usr/share/dotnet
+ sudo apt-get remove -y '^mono-.*' || true
+ sudo apt-get remove -y '^ghc-.*' || true
+ sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
+ sudo apt-get remove -y 'php.*' || true
+ sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
+ sudo apt-get remove -y '^google-.*' || true
+ sudo apt-get remove -y azure-cli || true
+ sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
+ sudo apt-get remove -y '^gfortran-.*' || true
+ sudo apt-get remove -y microsoft-edge-stable || true
+ sudo apt-get remove -y firefox || true
+ sudo apt-get remove -y powershell || true
+ sudo apt-get remove -y r-base-core || true
+ sudo apt-get autoremove -y
+ sudo apt-get clean
+ echo
+ echo "Listing top largest packages"
+ pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
+ head -n 30 <<< "${pkgs}"
+ echo
+ sudo rm -rfv build || true
+ sudo rm -rf /usr/share/dotnet || true
+ sudo rm -rf /opt/ghc || true
+ sudo rm -rf "/usr/local/share/boost" || true
+ sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
+ df -h
+
+ - name: Set up QEMU
+ uses: docker/setup-qemu-action@master
+ with:
+ platforms: all
+
+ - name: Set up Docker Buildx
+ id: buildx
+ uses: docker/setup-buildx-action@master
+
+ - name: Checkout
+ uses: actions/checkout@v4
+
+ - name: Cache GRPC
+ uses: docker/build-push-action@v5
+ with:
+ builder: ${{ steps.buildx.outputs.name }}
+ # The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
+ # This means that even the MAKEFLAGS have to be an EXACT match.
+ # If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch.
+ build-args: |
+ GRPC_BASE_IMAGE=${{ matrix.grpc-base-image }}
+ MAKEFLAGS=--jobs=4 --output-sync=target
+ GRPC_VERSION=v1.58.0
+ context: .
+ file: ./Dockerfile
+ cache-to: type=gha,ignore-error=true
+ target: grpc
+ platforms: ${{ matrix.platforms }}
+ push: false
\ No newline at end of file
diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml
index b703b16d..9c4fece7 100644
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -22,6 +22,7 @@ jobs:
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
base-image: ${{ matrix.base-image }}
+ grpc-base-image: ${{ matrix.grpc-base-image }}
makeflags: ${{ matrix.makeflags }}
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
@@ -61,12 +62,14 @@ jobs:
ffmpeg: 'false'
image-type: 'extras'
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
+ grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'sycl_f16'
platforms: 'linux/amd64'
tag-latest: 'false'
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
+ grpc-base-image: "ubuntu:22.04"
tag-suffix: 'sycl-f16-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
@@ -85,6 +88,7 @@ jobs:
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
base-image: ${{ matrix.base-image }}
+ grpc-base-image: ${{ matrix.grpc-base-image }}
makeflags: ${{ matrix.makeflags }}
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
@@ -102,11 +106,12 @@ jobs:
image-type: 'core'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
- makeflags: "--jobs=5 --output-sync=target"
+ makeflags: "--jobs=4 --output-sync=target"
- build-type: 'sycl_f16'
platforms: 'linux/amd64'
tag-latest: 'false'
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
+ grpc-base-image: "ubuntu:22.04"
tag-suffix: 'sycl-f16-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
@@ -122,4 +127,4 @@ jobs:
image-type: 'core'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
- makeflags: "--jobs=5 --output-sync=target"
\ No newline at end of file
+ makeflags: "--jobs=4 --output-sync=target"
\ No newline at end of file
diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index d2607579..255c1c65 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -26,6 +26,7 @@ jobs:
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
base-image: ${{ matrix.base-image }}
+ grpc-base-image: ${{ matrix.grpc-base-image }}
aio: ${{ matrix.aio }}
makeflags: ${{ matrix.makeflags }}
latest-image: ${{ matrix.latest-image }}
@@ -129,6 +130,7 @@ jobs:
image-type: 'extras'
aio: "-aio-gpu-hipblas"
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
+ grpc-base-image: "ubuntu:22.04"
latest-image: 'latest-gpu-hipblas'
latest-image-aio: 'latest-aio-gpu-hipblas'
runs-on: 'arc-runner-set'
@@ -140,12 +142,14 @@ jobs:
ffmpeg: 'false'
image-type: 'extras'
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
+ grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'sycl_f16'
platforms: 'linux/amd64'
tag-latest: 'auto'
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
+ grpc-base-image: "ubuntu:22.04"
tag-suffix: '-sycl-f16-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
@@ -158,6 +162,7 @@ jobs:
platforms: 'linux/amd64'
tag-latest: 'auto'
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
+ grpc-base-image: "ubuntu:22.04"
tag-suffix: '-sycl-f32-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
@@ -171,6 +176,7 @@ jobs:
platforms: 'linux/amd64'
tag-latest: 'false'
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
+ grpc-base-image: "ubuntu:22.04"
tag-suffix: '-sycl-f16-core'
ffmpeg: 'false'
image-type: 'core'
@@ -180,6 +186,7 @@ jobs:
platforms: 'linux/amd64'
tag-latest: 'false'
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
+ grpc-base-image: "ubuntu:22.04"
tag-suffix: '-sycl-f32-core'
ffmpeg: 'false'
image-type: 'core'
@@ -189,6 +196,7 @@ jobs:
platforms: 'linux/amd64'
tag-latest: 'false'
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
+ grpc-base-image: "ubuntu:22.04"
tag-suffix: '-sycl-f16-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
@@ -198,6 +206,7 @@ jobs:
platforms: 'linux/amd64'
tag-latest: 'false'
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
+ grpc-base-image: "ubuntu:22.04"
tag-suffix: '-sycl-f32-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
@@ -210,6 +219,7 @@ jobs:
ffmpeg: 'true'
image-type: 'core'
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
+ grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
@@ -219,6 +229,7 @@ jobs:
ffmpeg: 'false'
image-type: 'core'
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
+ grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
@@ -236,6 +247,7 @@ jobs:
runs-on: ${{ matrix.runs-on }}
aio: ${{ matrix.aio }}
base-image: ${{ matrix.base-image }}
+ grpc-base-image: ${{ matrix.grpc-base-image }}
makeflags: ${{ matrix.makeflags }}
latest-image: ${{ matrix.latest-image }}
latest-image-aio: ${{ matrix.latest-image-aio }}
@@ -258,7 +270,7 @@ jobs:
aio: "-aio-cpu"
latest-image: 'latest-cpu'
latest-image-aio: 'latest-aio-cpu'
- makeflags: "--jobs=5 --output-sync=target"
+ makeflags: "--jobs=4 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
@@ -269,7 +281,7 @@ jobs:
image-type: 'core'
base-image: "ubuntu:22.04"
runs-on: 'ubuntu-latest'
- makeflags: "--jobs=5 --output-sync=target"
+ makeflags: "--jobs=4 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "1"
@@ -280,7 +292,7 @@ jobs:
image-type: 'core'
base-image: "ubuntu:22.04"
runs-on: 'ubuntu-latest'
- makeflags: "--jobs=5 --output-sync=target"
+ makeflags: "--jobs=4 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
@@ -291,7 +303,7 @@ jobs:
image-type: 'core'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
- makeflags: "--jobs=5 --output-sync=target"
+ makeflags: "--jobs=4 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "1"
@@ -302,4 +314,4 @@ jobs:
image-type: 'core'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
- makeflags: "--jobs=5 --output-sync=target"
+ makeflags: "--jobs=4 --output-sync=target"
diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml
index b0684a4c..b06100ff 100644
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -6,6 +6,10 @@ on:
inputs:
base-image:
description: 'Base image'
+ required: true
+ type: string
+ grpc-base-image:
+ description: 'GRPC Base image, must be a compatible image with base-image'
required: false
default: ''
type: string
@@ -57,7 +61,7 @@ on:
makeflags:
description: 'Make Flags'
required: false
- default: '--jobs=3 --output-sync=target'
+ default: '--jobs=4 --output-sync=target'
type: string
aio:
description: 'AIO Image Name'
@@ -201,15 +205,16 @@ jobs:
uses: docker/build-push-action@v5
with:
builder: ${{ steps.buildx.outputs.name }}
+ # The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
+ # This means that even the MAKEFLAGS have to be an EXACT match.
+ # If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch.
build-args: |
- IMAGE_TYPE=${{ inputs.image-type }}
- BASE_IMAGE=${{ inputs.base-image }}
- MAKEFLAGS=${{ inputs.makeflags }}
+ GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
+ MAKEFLAGS=--jobs=4 --output-sync=target
GRPC_VERSION=v1.58.0
context: .
file: ./Dockerfile
cache-from: type=gha
- cache-to: type=gha,ignore-error=true
target: grpc
platforms: ${{ inputs.platforms }}
push: false
diff --git a/Dockerfile b/Dockerfile
index 397fbe22..805ac3a6 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,5 +1,6 @@
ARG IMAGE_TYPE=extras
ARG BASE_IMAGE=ubuntu:22.04
+ARG GRPC_BASE_IMAGE=${BASE_IMAGE}
# extras or core
FROM ${BASE_IMAGE} as requirements-core
@@ -104,7 +105,7 @@ RUN if [ ! -e /usr/bin/python ]; then \
###################################
###################################
-FROM ${BASE_IMAGE} as grpc
+FROM ${GRPC_BASE_IMAGE} as grpc
ARG MAKEFLAGS
ARG GRPC_VERSION=v1.58.0
From bbea62b907db917b8ad7036d06b828da48269bf8 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Thu, 18 Apr 2024 22:43:12 +0200
Subject: [PATCH 0180/2750] feat(functions): support models with no grammar,
add tests (#2068)
Signed-off-by: Ettore Di Giacinto
---
core/config/backend_config.go | 10 +-
core/http/endpoints/openai/chat.go | 131 ++++++------------
core/http/endpoints/openai/completion.go | 4 +-
core/http/endpoints/openai/request.go | 4 +-
core/schema/openai.go | 14 +-
pkg/{grammar => functions}/functions.go | 2 +-
.../functions_suite_test.go} | 2 +-
pkg/{grammar => functions}/functions_test.go | 4 +-
.../grammar_json_schema.go} | 2 +-
.../grammar_json_schema_test.go} | 4 +-
pkg/functions/parse.go | 108 +++++++++++++++
pkg/functions/parse_test.go | 85 ++++++++++++
pkg/model/loader.go | 4 +-
13 files changed, 255 insertions(+), 119 deletions(-)
rename pkg/{grammar => functions}/functions.go (98%)
rename pkg/{grammar/grammar_suite_test.go => functions/functions_suite_test.go} (90%)
rename pkg/{grammar => functions}/functions_test.go (96%)
rename pkg/{grammar/json_schema.go => functions/grammar_json_schema.go} (99%)
rename pkg/{grammar/json_schema_test.go => functions/grammar_json_schema_test.go} (98%)
create mode 100644 pkg/functions/parse.go
create mode 100644 pkg/functions/parse_test.go
diff --git a/core/config/backend_config.go b/core/config/backend_config.go
index 81c92d01..1161cf9f 100644
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -12,6 +12,7 @@ import (
"github.com/go-skynet/LocalAI/core/schema"
"github.com/go-skynet/LocalAI/pkg/downloader"
+ "github.com/go-skynet/LocalAI/pkg/functions"
"github.com/go-skynet/LocalAI/pkg/utils"
"github.com/rs/zerolog/log"
"gopkg.in/yaml.v3"
@@ -39,7 +40,7 @@ type BackendConfig struct {
InputToken [][]int `yaml:"-"`
functionCallString, functionCallNameString string `yaml:"-"`
- FunctionsConfig Functions `yaml:"function"`
+ FunctionsConfig functions.FunctionsConfig `yaml:"function"`
FeatureFlag FeatureFlag `yaml:"feature_flags"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early.
// LLM configs (GPT4ALL, Llama.cpp, ...)
@@ -157,13 +158,6 @@ type AutoGPTQ struct {
UseFastTokenizer bool `yaml:"use_fast_tokenizer"`
}
-type Functions struct {
- DisableNoAction bool `yaml:"disable_no_action"`
- NoActionFunctionName string `yaml:"no_action_function_name"`
- NoActionDescriptionName string `yaml:"no_action_description_name"`
- ParallelCalls bool `yaml:"parallel_calls"`
-}
-
type TemplateConfig struct {
Chat string `yaml:"chat"`
ChatMessage string `yaml:"chat_message"`
diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go
index 36d1142b..9adba8ea 100644
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -11,9 +11,8 @@ import (
"github.com/go-skynet/LocalAI/core/backend"
"github.com/go-skynet/LocalAI/core/config"
"github.com/go-skynet/LocalAI/core/schema"
- "github.com/go-skynet/LocalAI/pkg/grammar"
+ "github.com/go-skynet/LocalAI/pkg/functions"
model "github.com/go-skynet/LocalAI/pkg/model"
- "github.com/go-skynet/LocalAI/pkg/utils"
"github.com/gofiber/fiber/v2"
"github.com/google/uuid"
"github.com/rs/zerolog/log"
@@ -68,8 +67,8 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
return true
})
- results := parseFunctionCall(result, config.FunctionsConfig.ParallelCalls)
- noActionToRun := len(results) > 0 && results[0].name == noAction
+ results := functions.ParseFunctionCall(result, config.FunctionsConfig)
+ noActionToRun := len(results) > 0 && results[0].Name == noAction || len(results) == 0
switch {
case noActionToRun:
@@ -82,7 +81,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
}
responses <- initialMessage
- result, err := handleQuestion(config, req, ml, startupOptions, results[0].arguments, prompt)
+ result, err := handleQuestion(config, req, ml, startupOptions, results, prompt)
if err != nil {
log.Error().Err(err).Msg("error handling question")
return
@@ -105,7 +104,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
default:
for i, ss := range results {
- name, args := ss.name, ss.arguments
+ name, args := ss.Name, ss.Arguments
initialMessage := schema.OpenAIResponse{
ID: id,
@@ -156,8 +155,6 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
}
return func(c *fiber.Ctx) error {
- processFunctions := false
- funcs := grammar.Functions{}
modelFile, input, err := readRequest(c, ml, startupOptions, true)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
@@ -169,6 +166,9 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
}
log.Debug().Msgf("Configuration read: %+v", config)
+ funcs := input.Functions
+ shouldUseFn := len(input.Functions) > 0 && config.ShouldUseFunctions()
+
// Allow the user to set custom actions via config file
// to be "embedded" in each model
noActionName := "answer"
@@ -182,18 +182,18 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
}
if input.ResponseFormat.Type == "json_object" {
- input.Grammar = grammar.JSONBNF
+ input.Grammar = functions.JSONBNF
}
config.Grammar = input.Grammar
- // process functions if we have any defined or if we have a function call string
- if len(input.Functions) > 0 && config.ShouldUseFunctions() {
+ if shouldUseFn {
log.Debug().Msgf("Response needs to process functions")
+ }
- processFunctions = true
-
- noActionGrammar := grammar.Function{
+ switch {
+ case !config.FunctionsConfig.NoGrammar && shouldUseFn:
+ noActionGrammar := functions.Function{
Name: noActionName,
Description: noActionDescription,
Parameters: map[string]interface{}{
@@ -206,7 +206,6 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
}
// Append the no action function
- funcs = append(funcs, input.Functions...)
if !config.FunctionsConfig.DisableNoAction {
funcs = append(funcs, noActionGrammar)
}
@@ -219,10 +218,17 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
// Update input grammar
jsStruct := funcs.ToJSONStructure()
config.Grammar = jsStruct.Grammar("", config.FunctionsConfig.ParallelCalls)
- } else if input.JSONFunctionGrammarObject != nil {
+ case input.JSONFunctionGrammarObject != nil:
config.Grammar = input.JSONFunctionGrammarObject.Grammar("", config.FunctionsConfig.ParallelCalls)
+ default:
+ // Force picking one of the functions by the request
+ if config.FunctionToCall() != "" {
+ funcs = funcs.Select(config.FunctionToCall())
+ }
}
+ // process functions if we have any defined or if we have a function call string
+
// functions are not supported in stream mode (yet?)
toStream := input.Stream
@@ -232,8 +238,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
// If we are using the tokenizer template, we don't need to process the messages
// unless we are processing functions
- if !config.TemplateConfig.UseTokenizerTemplate || processFunctions {
-
+ if !config.TemplateConfig.UseTokenizerTemplate || shouldUseFn {
suppressConfigSystemPrompt := false
mess := []string{}
for messageIndex, i := range input.Messages {
@@ -346,11 +351,11 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
templateFile = config.Model
}
- if config.TemplateConfig.Chat != "" && !processFunctions {
+ if config.TemplateConfig.Chat != "" && !shouldUseFn {
templateFile = config.TemplateConfig.Chat
}
- if config.TemplateConfig.Functions != "" && processFunctions {
+ if config.TemplateConfig.Functions != "" && shouldUseFn {
templateFile = config.TemplateConfig.Functions
}
@@ -370,7 +375,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
}
log.Debug().Msgf("Prompt (after templating): %s", predInput)
- if processFunctions {
+ if shouldUseFn && config.Grammar != "" {
log.Debug().Msgf("Grammar: %+v", config.Grammar)
}
}
@@ -388,7 +393,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
responses := make(chan schema.OpenAIResponse)
- if !processFunctions {
+ if !shouldUseFn {
go process(predInput, input, config, ml, responses)
} else {
go processTools(noActionName, predInput, input, config, ml, responses)
@@ -446,18 +451,18 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
// no streaming mode
default:
result, tokenUsage, err := ComputeChoices(input, predInput, config, startupOptions, ml, func(s string, c *[]schema.Choice) {
- if !processFunctions {
+ if !shouldUseFn {
// no function is called, just reply and use stop as finish reason
*c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
return
}
- results := parseFunctionCall(s, config.FunctionsConfig.ParallelCalls)
- noActionsToRun := len(results) > 0 && results[0].name == noActionName
+ results := functions.ParseFunctionCall(s, config.FunctionsConfig)
+ noActionsToRun := len(results) > 0 && results[0].Name == noActionName || len(results) == 0
switch {
case noActionsToRun:
- result, err := handleQuestion(config, input, ml, startupOptions, results[0].arguments, predInput)
+ result, err := handleQuestion(config, input, ml, startupOptions, results, predInput)
if err != nil {
log.Error().Err(err).Msg("error handling question")
return
@@ -476,7 +481,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
}
for _, ss := range results {
- name, args := ss.name, ss.arguments
+ name, args := ss.Name, ss.Arguments
if len(input.Tools) > 0 {
// If we are using tools, we condense the function calls into
// a single response choice with all the tools
@@ -534,16 +539,20 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
// Return the prediction in the response body
return c.JSON(resp)
}
-
}
}
-func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, ml *model.ModelLoader, o *config.ApplicationConfig, args, prompt string) (string, error) {
+func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, ml *model.ModelLoader, o *config.ApplicationConfig, funcResults []functions.FuncCallResults, prompt string) (string, error) {
log.Debug().Msgf("nothing to do, computing a reply")
-
+ arg := ""
+ if len(funcResults) > 0 {
+ arg = funcResults[0].Arguments
+ }
// If there is a message that the LLM already sends as part of the JSON reply, use it
arguments := map[string]interface{}{}
- json.Unmarshal([]byte(args), &arguments)
+ if err := json.Unmarshal([]byte(arg), &arguments); err != nil {
+ log.Debug().Msg("handleQuestion: function result did not contain a valid JSON object")
+ }
m, exists := arguments["message"]
if exists {
switch message := m.(type) {
@@ -580,63 +589,3 @@ func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, m
}
return backend.Finetune(*config, prompt, prediction.Response), nil
}
-
-type funcCallResults struct {
- name string
- arguments string
-}
-
-func parseFunctionCall(llmresult string, multipleResults bool) []funcCallResults {
- results := []funcCallResults{}
-
- // TODO: use generics to avoid this code duplication
- if multipleResults {
- ss := []map[string]interface{}{}
- s := utils.EscapeNewLines(llmresult)
- json.Unmarshal([]byte(s), &ss)
- log.Debug().Msgf("Function return: %s %+v", s, ss)
-
- for _, s := range ss {
- func_name, ok := s["function"]
- if !ok {
- continue
- }
- args, ok := s["arguments"]
- if !ok {
- continue
- }
- d, _ := json.Marshal(args)
- funcName, ok := func_name.(string)
- if !ok {
- continue
- }
- results = append(results, funcCallResults{name: funcName, arguments: string(d)})
- }
- } else {
- // As we have to change the result before processing, we can't stream the answer token-by-token (yet?)
- ss := map[string]interface{}{}
- // This prevent newlines to break JSON parsing for clients
- s := utils.EscapeNewLines(llmresult)
- json.Unmarshal([]byte(s), &ss)
- log.Debug().Msgf("Function return: %s %+v", s, ss)
-
- // The grammar defines the function name as "function", while OpenAI returns "name"
- func_name, ok := ss["function"]
- if !ok {
- return results
- }
- // Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object
- args, ok := ss["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix)
- if !ok {
- return results
- }
- d, _ := json.Marshal(args)
- funcName, ok := func_name.(string)
- if !ok {
- return results
- }
- results = append(results, funcCallResults{name: funcName, arguments: string(d)})
- }
-
- return results
-}
diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go
index 69923475..bcd46db5 100644
--- a/core/http/endpoints/openai/completion.go
+++ b/core/http/endpoints/openai/completion.go
@@ -12,7 +12,7 @@ import (
"github.com/go-skynet/LocalAI/core/config"
"github.com/go-skynet/LocalAI/core/schema"
- "github.com/go-skynet/LocalAI/pkg/grammar"
+ "github.com/go-skynet/LocalAI/pkg/functions"
model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/gofiber/fiber/v2"
"github.com/google/uuid"
@@ -70,7 +70,7 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
}
if input.ResponseFormat.Type == "json_object" {
- input.Grammar = grammar.JSONBNF
+ input.Grammar = functions.JSONBNF
}
config.Grammar = input.Grammar
diff --git a/core/http/endpoints/openai/request.go b/core/http/endpoints/openai/request.go
index 369fb0b8..9a107bab 100644
--- a/core/http/endpoints/openai/request.go
+++ b/core/http/endpoints/openai/request.go
@@ -12,7 +12,7 @@ import (
"github.com/go-skynet/LocalAI/core/config"
fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
"github.com/go-skynet/LocalAI/core/schema"
- "github.com/go-skynet/LocalAI/pkg/grammar"
+ "github.com/go-skynet/LocalAI/pkg/functions"
model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/gofiber/fiber/v2"
"github.com/rs/zerolog/log"
@@ -145,7 +145,7 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
}
if input.ToolsChoice != nil {
- var toolChoice grammar.Tool
+ var toolChoice functions.Tool
switch content := input.ToolsChoice.(type) {
case string:
diff --git a/core/schema/openai.go b/core/schema/openai.go
index 6aa0f1b0..a251ba68 100644
--- a/core/schema/openai.go
+++ b/core/schema/openai.go
@@ -3,7 +3,7 @@ package schema
import (
"context"
- "github.com/go-skynet/LocalAI/pkg/grammar"
+ functions "github.com/go-skynet/LocalAI/pkg/functions"
)
// APIError provides error information returned by the OpenAI API.
@@ -108,7 +108,7 @@ type ChatCompletionResponseFormat struct {
type OpenAIRequest struct {
PredictionOptions
- Context context.Context `json:"-"`
+ Context context.Context `json:"-"`
Cancel context.CancelFunc `json:"-"`
// whisper
@@ -130,11 +130,11 @@ type OpenAIRequest struct {
Messages []Message `json:"messages" yaml:"messages"`
// A list of available functions to call
- Functions []grammar.Function `json:"functions" yaml:"functions"`
- FunctionCall interface{} `json:"function_call" yaml:"function_call"` // might be a string or an object
+ Functions functions.Functions `json:"functions" yaml:"functions"`
+ FunctionCall interface{} `json:"function_call" yaml:"function_call"` // might be a string or an object
- Tools []grammar.Tool `json:"tools,omitempty" yaml:"tools"`
- ToolsChoice interface{} `json:"tool_choice,omitempty" yaml:"tool_choice"`
+ Tools []functions.Tool `json:"tools,omitempty" yaml:"tools"`
+ ToolsChoice interface{} `json:"tool_choice,omitempty" yaml:"tool_choice"`
Stream bool `json:"stream"`
@@ -145,7 +145,7 @@ type OpenAIRequest struct {
// A grammar to constrain the LLM output
Grammar string `json:"grammar" yaml:"grammar"`
- JSONFunctionGrammarObject *grammar.JSONFunctionStructure `json:"grammar_json_functions" yaml:"grammar_json_functions"`
+ JSONFunctionGrammarObject *functions.JSONFunctionStructure `json:"grammar_json_functions" yaml:"grammar_json_functions"`
Backend string `json:"backend" yaml:"backend"`
diff --git a/pkg/grammar/functions.go b/pkg/functions/functions.go
similarity index 98%
rename from pkg/grammar/functions.go
rename to pkg/functions/functions.go
index 1038f5e6..d75a2ee3 100644
--- a/pkg/grammar/functions.go
+++ b/pkg/functions/functions.go
@@ -1,4 +1,4 @@
-package grammar
+package functions
import (
"encoding/json"
diff --git a/pkg/grammar/grammar_suite_test.go b/pkg/functions/functions_suite_test.go
similarity index 90%
rename from pkg/grammar/grammar_suite_test.go
rename to pkg/functions/functions_suite_test.go
index 652643b6..8964b1c8 100644
--- a/pkg/grammar/grammar_suite_test.go
+++ b/pkg/functions/functions_suite_test.go
@@ -1,4 +1,4 @@
-package grammar
+package functions
import (
"testing"
diff --git a/pkg/grammar/functions_test.go b/pkg/functions/functions_test.go
similarity index 96%
rename from pkg/grammar/functions_test.go
rename to pkg/functions/functions_test.go
index 6e8a56ed..97953a5e 100644
--- a/pkg/grammar/functions_test.go
+++ b/pkg/functions/functions_test.go
@@ -1,7 +1,7 @@
-package grammar_test
+package functions_test
import (
- . "github.com/go-skynet/LocalAI/pkg/grammar"
+ . "github.com/go-skynet/LocalAI/pkg/functions"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
diff --git a/pkg/grammar/json_schema.go b/pkg/functions/grammar_json_schema.go
similarity index 99%
rename from pkg/grammar/json_schema.go
rename to pkg/functions/grammar_json_schema.go
index 76f9778f..01046390 100644
--- a/pkg/grammar/json_schema.go
+++ b/pkg/functions/grammar_json_schema.go
@@ -1,4 +1,4 @@
-package grammar
+package functions
// a golang port of https://github.com/ggerganov/llama.cpp/pull/1887
diff --git a/pkg/grammar/json_schema_test.go b/pkg/functions/grammar_json_schema_test.go
similarity index 98%
rename from pkg/grammar/json_schema_test.go
rename to pkg/functions/grammar_json_schema_test.go
index 39d2a4d5..fc9029a8 100644
--- a/pkg/grammar/json_schema_test.go
+++ b/pkg/functions/grammar_json_schema_test.go
@@ -1,9 +1,9 @@
-package grammar_test
+package functions_test
import (
"strings"
- . "github.com/go-skynet/LocalAI/pkg/grammar"
+ . "github.com/go-skynet/LocalAI/pkg/functions"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
diff --git a/pkg/functions/parse.go b/pkg/functions/parse.go
new file mode 100644
index 00000000..5324e8c6
--- /dev/null
+++ b/pkg/functions/parse.go
@@ -0,0 +1,108 @@
+package functions
+
+import (
+ "encoding/json"
+ "regexp"
+
+ "github.com/go-skynet/LocalAI/pkg/utils"
+ "github.com/rs/zerolog/log"
+)
+
+type FunctionsConfig struct {
+ DisableNoAction bool `yaml:"disable_no_action"`
+ NoActionFunctionName string `yaml:"no_action_function_name"`
+ NoActionDescriptionName string `yaml:"no_action_description_name"`
+ ParallelCalls bool `yaml:"parallel_calls"`
+ NoGrammar bool `yaml:"no_grammar"`
+ ResponseRegex string `yaml:"response_regex"`
+}
+
+type FuncCallResults struct {
+ Name string
+ Arguments string
+}
+
+func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncCallResults {
+ multipleResults := functionConfig.ParallelCalls
+ useGrammars := !functionConfig.NoGrammar
+
+ results := []FuncCallResults{}
+
+ // if no grammar is used, we have to extract function and arguments from the result
+ if !useGrammars {
+ // the response is a string that we have to parse
+
+ // We use named regexes here to extract the function name and arguments
+ // obviously, this expects the LLM to be stable and return correctly formatted JSON
+ // TODO: optimize this and pre-compile it
+ var respRegex = regexp.MustCompile(functionConfig.ResponseRegex)
+ match := respRegex.FindStringSubmatch(llmresult)
+ result := make(map[string]string)
+ for i, name := range respRegex.SubexpNames() {
+ if i != 0 && name != "" && len(match) > i {
+ result[name] = match[i]
+ }
+ }
+
+ // TODO: open point about multiple results and/or mixed with chat messages
+ // This is not handled as for now, we only expect one function call per response
+ functionName := result["function"]
+ if functionName == "" {
+ return results
+ }
+
+ return append(results, FuncCallResults{Name: result["function"], Arguments: result["arguments"]})
+ }
+
+ // with grammars
+ // TODO: use generics to avoid this code duplication
+ if multipleResults {
+ ss := []map[string]interface{}{}
+ s := utils.EscapeNewLines(llmresult)
+ json.Unmarshal([]byte(s), &ss)
+ log.Debug().Msgf("Function return: %s %+v", s, ss)
+
+ for _, s := range ss {
+ func_name, ok := s["function"]
+ if !ok {
+ continue
+ }
+ args, ok := s["arguments"]
+ if !ok {
+ continue
+ }
+ d, _ := json.Marshal(args)
+ funcName, ok := func_name.(string)
+ if !ok {
+ continue
+ }
+ results = append(results, FuncCallResults{Name: funcName, Arguments: string(d)})
+ }
+ } else {
+ // As we have to change the result before processing, we can't stream the answer token-by-token (yet?)
+ ss := map[string]interface{}{}
+ // This prevent newlines to break JSON parsing for clients
+ s := utils.EscapeNewLines(llmresult)
+ json.Unmarshal([]byte(s), &ss)
+ log.Debug().Msgf("Function return: %s %+v", s, ss)
+
+ // The grammar defines the function name as "function", while OpenAI returns "name"
+ func_name, ok := ss["function"]
+ if !ok {
+ return results
+ }
+ // Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object
+ args, ok := ss["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix)
+ if !ok {
+ return results
+ }
+ d, _ := json.Marshal(args)
+ funcName, ok := func_name.(string)
+ if !ok {
+ return results
+ }
+ results = append(results, FuncCallResults{Name: funcName, Arguments: string(d)})
+ }
+
+ return results
+}
diff --git a/pkg/functions/parse_test.go b/pkg/functions/parse_test.go
new file mode 100644
index 00000000..5168a7d1
--- /dev/null
+++ b/pkg/functions/parse_test.go
@@ -0,0 +1,85 @@
+package functions_test
+
+import (
+ . "github.com/go-skynet/LocalAI/pkg/functions"
+ . "github.com/onsi/ginkgo/v2"
+ . "github.com/onsi/gomega"
+)
+
+var _ = Describe("LocalAI function parse tests", func() {
+ var functionConfig FunctionsConfig
+
+ BeforeEach(func() {
+ // Default configuration setup
+ functionConfig = FunctionsConfig{
+ ParallelCalls: false,
+ NoGrammar: false,
+ ResponseRegex: `(?P\w+)\s*\((?P.*)\)`,
+ }
+ })
+
+ Context("when using grammars and single result expected", func() {
+ It("should parse the function name and arguments correctly", func() {
+ input := `{"function": "add", "arguments": {"x": 5, "y": 3}}`
+ functionConfig.ParallelCalls = false
+ functionConfig.NoGrammar = false
+
+ results := ParseFunctionCall(input, functionConfig)
+ Expect(results).To(HaveLen(1))
+ Expect(results[0].Name).To(Equal("add"))
+ Expect(results[0].Arguments).To(Equal(`{"x":5,"y":3}`))
+ })
+ })
+
+ Context("when not using grammars and regex is needed", func() {
+ It("should extract function name and arguments from the regex", func() {
+ input := `add({"x":5,"y":3})`
+ functionConfig.NoGrammar = true
+
+ results := ParseFunctionCall(input, functionConfig)
+ Expect(results).To(HaveLen(1))
+ Expect(results[0].Name).To(Equal("add"))
+ Expect(results[0].Arguments).To(Equal(`{"x":5,"y":3}`))
+ })
+ })
+
+ Context("when having invalid input", func() {
+ It("returns no results when there is no input", func() {
+ input := ""
+ functionConfig.NoGrammar = true
+
+ results := ParseFunctionCall(input, functionConfig)
+ Expect(results).To(HaveLen(0))
+
+ functionConfig.NoGrammar = false
+
+ results = ParseFunctionCall(input, functionConfig)
+ Expect(results).To(HaveLen(0))
+ })
+ It("returns no results when is invalid", func() {
+ input := "invalid input"
+ functionConfig.NoGrammar = true
+
+ results := ParseFunctionCall(input, functionConfig)
+ Expect(results).To(HaveLen(0))
+ functionConfig.NoGrammar = false
+
+ results = ParseFunctionCall(input, functionConfig)
+ Expect(results).To(HaveLen(0))
+ })
+ })
+ Context("when parallel calls are enabled", func() {
+ It("should handle multiple function calls", func() {
+ input := `[{"function": "add", "arguments": {"x": 5, "y": 3}}, {"function": "subtract", "arguments": {"x": 10, "y": 7}}]`
+ functionConfig.ParallelCalls = true
+ functionConfig.NoGrammar = false
+
+ results := ParseFunctionCall(input, functionConfig)
+ Expect(results).To(HaveLen(2))
+ Expect(results[0].Name).To(Equal("add"))
+ Expect(results[0].Arguments).To(Equal(`{"x":5,"y":3}`))
+ Expect(results[1].Name).To(Equal("subtract"))
+ Expect(results[1].Arguments).To(Equal(`{"x":10,"y":7}`))
+ })
+ })
+})
diff --git a/pkg/model/loader.go b/pkg/model/loader.go
index 003d8327..f3182940 100644
--- a/pkg/model/loader.go
+++ b/pkg/model/loader.go
@@ -11,7 +11,7 @@ import (
"text/template"
"github.com/Masterminds/sprig/v3"
- grammar "github.com/go-skynet/LocalAI/pkg/grammar"
+ "github.com/go-skynet/LocalAI/pkg/functions"
"github.com/go-skynet/LocalAI/pkg/grpc"
process "github.com/mudler/go-processmanager"
"github.com/rs/zerolog/log"
@@ -25,7 +25,7 @@ type PromptTemplateData struct {
SuppressSystemPrompt bool // used by chat specifically to indicate that SystemPrompt above should be _ignored_
Input string
Instruction string
- Functions []grammar.Function
+ Functions []functions.Function
MessageIndex int
}
From e9448005a50bf966248ea34fbc0a63c23a43e4fb Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Thu, 18 Apr 2024 23:30:55 +0200
Subject: [PATCH 0181/2750] :arrow_up: Update ggerganov/llama.cpp (#2051)
Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index d236f860..e2bfa594 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=7593639ce335e8d7f89aa9a54d616951f273af60
+CPPLLAMA_VERSION?=0d56246f4b9764158525d894b96606f6163c53a8
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
From 852316c5a61fa8430299717912a2fd62f23fd572 Mon Sep 17 00:00:00 2001
From: cryptk <421501+cryptk@users.noreply.github.com>
Date: Thu, 18 Apr 2024 19:52:34 -0500
Subject: [PATCH 0182/2750] fix: move the GRPC cache generation workflow into
it's own concurrency group (#2071)
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
---
.github/workflows/generate_grpc_cache.yaml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/generate_grpc_cache.yaml b/.github/workflows/generate_grpc_cache.yaml
index 11abc10a..c6b080b5 100644
--- a/.github/workflows/generate_grpc_cache.yaml
+++ b/.github/workflows/generate_grpc_cache.yaml
@@ -4,7 +4,7 @@ on:
- workflow_dispatch
concurrency:
- group: ci-${{ github.head_ref || github.ref }}-${{ github.repository }}
+ group: grpc-cache-${{ github.head_ref || github.ref }}-${{ github.repository }}
cancel-in-progress: true
jobs:
From 27ec84827c40a81663ef4df51c5e9e30bbb458c9 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Fri, 19 Apr 2024 04:40:18 +0200
Subject: [PATCH 0183/2750] refactor(template): isolate and add tests (#2069)
* refactor(template): isolate and add tests
Signed-off-by: Ettore Di Giacinto
---------
Signed-off-by: Ettore Di Giacinto
Signed-off-by: Dave
Co-authored-by: Dave
---
pkg/model/loader.go | 111 +++++-------------------------
pkg/model/loader_test.go | 7 +-
pkg/templates/cache.go | 103 +++++++++++++++++++++++++++
pkg/templates/cache_test.go | 73 ++++++++++++++++++++
pkg/templates/utils_suite_test.go | 13 ++++
pkg/utils/path.go | 6 ++
6 files changed, 218 insertions(+), 95 deletions(-)
create mode 100644 pkg/templates/cache.go
create mode 100644 pkg/templates/cache_test.go
create mode 100644 pkg/templates/utils_suite_test.go
diff --git a/pkg/model/loader.go b/pkg/model/loader.go
index f3182940..1b5c9aa0 100644
--- a/pkg/model/loader.go
+++ b/pkg/model/loader.go
@@ -1,18 +1,19 @@
package model
import (
- "bytes"
"context"
"fmt"
"os"
"path/filepath"
"strings"
"sync"
- "text/template"
- "github.com/Masterminds/sprig/v3"
+ "github.com/go-skynet/LocalAI/pkg/templates"
+
"github.com/go-skynet/LocalAI/pkg/functions"
"github.com/go-skynet/LocalAI/pkg/grpc"
+ "github.com/go-skynet/LocalAI/pkg/utils"
+
process "github.com/mudler/go-processmanager"
"github.com/rs/zerolog/log"
)
@@ -42,21 +43,6 @@ type ChatMessageTemplateData struct {
LastMessage bool
}
-// Keep this in sync with config.TemplateConfig. Is there a more idiomatic way to accomplish this in go?
-// Technically, order doesn't _really_ matter, but the count must stay in sync, see tests/integration/reflect_test.go
-type TemplateType int
-
-const (
- ChatPromptTemplate TemplateType = iota
- ChatMessageTemplate
- CompletionPromptTemplate
- EditPromptTemplate
- FunctionsPromptTemplate
-
- // The following TemplateType is **NOT** a valid value and MUST be last. It exists to make the sanity integration tests simpler!
- IntegrationTestTemplate
-)
-
// new idea: what if we declare a struct of these here, and use a loop to check?
// TODO: Split ModelLoader and TemplateLoader? Just to keep things more organized. Left together to share a mutex until I look into that. Would split if we seperate directories for .bin/.yaml and .tmpl
@@ -67,7 +53,7 @@ type ModelLoader struct {
grpcClients map[string]grpc.Backend
models map[string]ModelAddress
grpcProcesses map[string]*process.Process
- templates map[TemplateType]map[string]*template.Template
+ templates *templates.TemplateCache
wd *WatchDog
}
@@ -86,11 +72,10 @@ func NewModelLoader(modelPath string) *ModelLoader {
ModelPath: modelPath,
grpcClients: make(map[string]grpc.Backend),
models: make(map[string]ModelAddress),
- templates: make(map[TemplateType]map[string]*template.Template),
+ templates: templates.NewTemplateCache(modelPath),
grpcProcesses: make(map[string]*process.Process),
}
- nml.initializeTemplateMap()
return nml
}
@@ -99,7 +84,7 @@ func (ml *ModelLoader) SetWatchDog(wd *WatchDog) {
}
func (ml *ModelLoader) ExistsInModelPath(s string) bool {
- return existsInPath(ml.ModelPath, s)
+ return utils.ExistsInPath(ml.ModelPath, s)
}
func (ml *ModelLoader) ListModels() ([]string, error) {
@@ -194,82 +179,22 @@ func (ml *ModelLoader) CheckIsLoaded(s string) ModelAddress {
return ""
}
-func (ml *ModelLoader) EvaluateTemplateForPrompt(templateType TemplateType, templateName string, in PromptTemplateData) (string, error) {
+const (
+ ChatPromptTemplate templates.TemplateType = iota
+ ChatMessageTemplate
+ CompletionPromptTemplate
+ EditPromptTemplate
+ FunctionsPromptTemplate
+)
+
+func (ml *ModelLoader) EvaluateTemplateForPrompt(templateType templates.TemplateType, templateName string, in PromptTemplateData) (string, error) {
// TODO: should this check be improved?
if templateType == ChatMessageTemplate {
return "", fmt.Errorf("invalid templateType: ChatMessage")
}
- return ml.evaluateTemplate(templateType, templateName, in)
+ return ml.templates.EvaluateTemplate(templateType, templateName, in)
}
func (ml *ModelLoader) EvaluateTemplateForChatMessage(templateName string, messageData ChatMessageTemplateData) (string, error) {
- return ml.evaluateTemplate(ChatMessageTemplate, templateName, messageData)
-}
-
-func existsInPath(path string, s string) bool {
- _, err := os.Stat(filepath.Join(path, s))
- return err == nil
-}
-
-func (ml *ModelLoader) initializeTemplateMap() {
- // This also seems somewhat clunky as we reference the Test / End of valid data value slug, but it works?
- for tt := TemplateType(0); tt < IntegrationTestTemplate; tt++ {
- ml.templates[tt] = make(map[string]*template.Template)
- }
-}
-
-func (ml *ModelLoader) evaluateTemplate(templateType TemplateType, templateName string, in interface{}) (string, error) {
- ml.mu.Lock()
- defer ml.mu.Unlock()
-
- m, ok := ml.templates[templateType][templateName]
- if !ok {
- // return "", fmt.Errorf("template not loaded: %s", templateName)
- loadErr := ml.loadTemplateIfExists(templateType, templateName)
- if loadErr != nil {
- return "", loadErr
- }
- m = ml.templates[templateType][templateName] // ok is not important since we check m on the next line, and wealready checked
- }
- if m == nil {
- return "", fmt.Errorf("failed loading a template for %s", templateName)
- }
-
- var buf bytes.Buffer
-
- if err := m.Execute(&buf, in); err != nil {
- return "", err
- }
- return buf.String(), nil
-}
-
-func (ml *ModelLoader) loadTemplateIfExists(templateType TemplateType, templateName string) error {
- // Check if the template was already loaded
- if _, ok := ml.templates[templateType][templateName]; ok {
- return nil
- }
-
- // Check if the model path exists
- // skip any error here - we run anyway if a template does not exist
- modelTemplateFile := fmt.Sprintf("%s.tmpl", templateName)
-
- dat := ""
- if ml.ExistsInModelPath(modelTemplateFile) {
- d, err := os.ReadFile(filepath.Join(ml.ModelPath, modelTemplateFile))
- if err != nil {
- return err
- }
- dat = string(d)
- } else {
- dat = templateName
- }
-
- // Parse the template
- tmpl, err := template.New("prompt").Funcs(sprig.FuncMap()).Parse(dat)
- if err != nil {
- return err
- }
- ml.templates[templateType][templateName] = tmpl
-
- return nil
+ return ml.templates.EvaluateTemplate(ChatMessageTemplate, templateName, messageData)
}
diff --git a/pkg/model/loader_test.go b/pkg/model/loader_test.go
index 4c3c1a88..e4207b35 100644
--- a/pkg/model/loader_test.go
+++ b/pkg/model/loader_test.go
@@ -92,10 +92,13 @@ var testMatch map[string]map[string]interface{} = map[string]map[string]interfac
var _ = Describe("Templates", func() {
Context("chat message", func() {
- modelLoader := NewModelLoader("")
+ var modelLoader *ModelLoader
+ BeforeEach(func() {
+ modelLoader = NewModelLoader("")
+ })
for key := range testMatch {
foo := testMatch[key]
- It("renders correctly "+key, func() {
+ It("renders correctly `"+key+"`", func() {
templated, err := modelLoader.EvaluateTemplateForChatMessage(foo["template"].(string), foo["data"].(model.ChatMessageTemplateData))
Expect(err).ToNot(HaveOccurred())
Expect(templated).To(Equal(foo["expected"]), templated)
diff --git a/pkg/templates/cache.go b/pkg/templates/cache.go
new file mode 100644
index 00000000..9ff55605
--- /dev/null
+++ b/pkg/templates/cache.go
@@ -0,0 +1,103 @@
+package templates
+
+import (
+ "bytes"
+ "fmt"
+ "os"
+ "path/filepath"
+ "sync"
+ "text/template"
+
+ "github.com/go-skynet/LocalAI/pkg/utils"
+
+ "github.com/Masterminds/sprig/v3"
+)
+
+// Keep this in sync with config.TemplateConfig. Is there a more idiomatic way to accomplish this in go?
+// Technically, order doesn't _really_ matter, but the count must stay in sync, see tests/integration/reflect_test.go
+type TemplateType int
+
+type TemplateCache struct {
+ mu sync.Mutex
+ templatesPath string
+ templates map[TemplateType]map[string]*template.Template
+}
+
+func NewTemplateCache(templatesPath string) *TemplateCache {
+ tc := &TemplateCache{
+ templatesPath: templatesPath,
+ templates: make(map[TemplateType]map[string]*template.Template),
+ }
+ return tc
+}
+
+func (tc *TemplateCache) initializeTemplateMapKey(tt TemplateType) {
+ if _, ok := tc.templates[tt]; !ok {
+ tc.templates[tt] = make(map[string]*template.Template)
+ }
+}
+
+func (tc *TemplateCache) EvaluateTemplate(templateType TemplateType, templateName string, in interface{}) (string, error) {
+ tc.mu.Lock()
+ defer tc.mu.Unlock()
+
+ tc.initializeTemplateMapKey(templateType)
+ m, ok := tc.templates[templateType][templateName]
+ if !ok {
+ // return "", fmt.Errorf("template not loaded: %s", templateName)
+ loadErr := tc.loadTemplateIfExists(templateType, templateName)
+ if loadErr != nil {
+ return "", loadErr
+ }
+ m = tc.templates[templateType][templateName] // ok is not important since we check m on the next line, and wealready checked
+ }
+ if m == nil {
+ return "", fmt.Errorf("failed loading a template for %s", templateName)
+ }
+
+ var buf bytes.Buffer
+
+ if err := m.Execute(&buf, in); err != nil {
+ return "", err
+ }
+ return buf.String(), nil
+}
+
+func (tc *TemplateCache) loadTemplateIfExists(templateType TemplateType, templateName string) error {
+
+ // Check if the template was already loaded
+ if _, ok := tc.templates[templateType][templateName]; ok {
+ return nil
+ }
+
+ // Check if the model path exists
+ // skip any error here - we run anyway if a template does not exist
+ modelTemplateFile := fmt.Sprintf("%s.tmpl", templateName)
+
+ dat := ""
+ file := filepath.Join(tc.templatesPath, modelTemplateFile)
+
+ // Security check
+ if err := utils.VerifyPath(modelTemplateFile, tc.templatesPath); err != nil {
+ return fmt.Errorf("template file outside path: %s", file)
+ }
+
+ if utils.ExistsInPath(tc.templatesPath, modelTemplateFile) {
+ d, err := os.ReadFile(file)
+ if err != nil {
+ return err
+ }
+ dat = string(d)
+ } else {
+ dat = templateName
+ }
+
+ // Parse the template
+ tmpl, err := template.New("prompt").Funcs(sprig.FuncMap()).Parse(dat)
+ if err != nil {
+ return err
+ }
+ tc.templates[templateType][templateName] = tmpl
+
+ return nil
+}
diff --git a/pkg/templates/cache_test.go b/pkg/templates/cache_test.go
new file mode 100644
index 00000000..83af02b2
--- /dev/null
+++ b/pkg/templates/cache_test.go
@@ -0,0 +1,73 @@
+package templates_test
+
+import (
+ "os"
+ "path/filepath"
+
+ "github.com/go-skynet/LocalAI/pkg/templates" // Update with your module path
+ . "github.com/onsi/ginkgo/v2"
+ . "github.com/onsi/gomega"
+)
+
+var _ = Describe("TemplateCache", func() {
+ var (
+ templateCache *templates.TemplateCache
+ tempDir string
+ )
+
+ BeforeEach(func() {
+ var err error
+ tempDir, err = os.MkdirTemp("", "templates")
+ Expect(err).NotTo(HaveOccurred())
+
+ // Writing example template files
+ err = os.WriteFile(filepath.Join(tempDir, "example.tmpl"), []byte("Hello, {{.Name}}!"), 0644)
+ Expect(err).NotTo(HaveOccurred())
+ err = os.WriteFile(filepath.Join(tempDir, "empty.tmpl"), []byte(""), 0644)
+ Expect(err).NotTo(HaveOccurred())
+
+ templateCache = templates.NewTemplateCache(tempDir)
+ })
+
+ AfterEach(func() {
+ os.RemoveAll(tempDir) // Clean up
+ })
+
+ Describe("EvaluateTemplate", func() {
+ Context("when template is loaded successfully", func() {
+ It("should evaluate the template correctly", func() {
+ result, err := templateCache.EvaluateTemplate(1, "example", map[string]string{"Name": "Gopher"})
+ Expect(err).NotTo(HaveOccurred())
+ Expect(result).To(Equal("Hello, Gopher!"))
+ })
+ })
+
+ Context("when template isn't a file", func() {
+ It("should parse from string", func() {
+ result, err := templateCache.EvaluateTemplate(1, "{{.Name}}", map[string]string{"Name": "Gopher"})
+ Expect(err).ToNot(HaveOccurred())
+ Expect(result).To(Equal("Gopher"))
+ })
+ })
+
+ Context("when template is empty", func() {
+ It("should return an empty string", func() {
+ result, err := templateCache.EvaluateTemplate(1, "empty", nil)
+ Expect(err).NotTo(HaveOccurred())
+ Expect(result).To(Equal(""))
+ })
+ })
+ })
+
+ Describe("concurrency", func() {
+ It("should handle multiple concurrent accesses", func(done Done) {
+ go func() {
+ _, _ = templateCache.EvaluateTemplate(1, "example", map[string]string{"Name": "Gopher"})
+ }()
+ go func() {
+ _, _ = templateCache.EvaluateTemplate(1, "example", map[string]string{"Name": "Gopher"})
+ }()
+ close(done)
+ }, 0.1) // timeout in seconds
+ })
+})
diff --git a/pkg/templates/utils_suite_test.go b/pkg/templates/utils_suite_test.go
new file mode 100644
index 00000000..011ba8f6
--- /dev/null
+++ b/pkg/templates/utils_suite_test.go
@@ -0,0 +1,13 @@
+package templates_test
+
+import (
+ "testing"
+
+ . "github.com/onsi/ginkgo/v2"
+ . "github.com/onsi/gomega"
+)
+
+func TestTemplates(t *testing.T) {
+ RegisterFailHandler(Fail)
+ RunSpecs(t, "Templates test suite")
+}
diff --git a/pkg/utils/path.go b/pkg/utils/path.go
index f95b0138..9982bc1e 100644
--- a/pkg/utils/path.go
+++ b/pkg/utils/path.go
@@ -2,10 +2,16 @@ package utils
import (
"fmt"
+ "os"
"path/filepath"
"strings"
)
+func ExistsInPath(path string, s string) bool {
+ _, err := os.Stat(filepath.Join(path, s))
+ return err == nil
+}
+
func inTrustedRoot(path string, trustedRoot string) error {
for path != "/" {
path = filepath.Dir(path)
From b2772509b44f2a19bb5d61a19c261b2ea02dc180 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Fri, 19 Apr 2024 18:23:44 +0200
Subject: [PATCH 0184/2750] models(llama3): add llama3 to embedded models
(#2074)
Signed-off-by: Ettore Di Giacinto
---
aio/cpu/text-to-text.yaml | 22 +++--
aio/gpu-8g/text-to-text.yaml | 22 +++--
aio/intel/text-to-text.yaml | 22 +++--
embedded/models/hermes-2-pro-mistral.yaml | 22 +++--
embedded/models/llama3-instruct.yaml | 48 +++++++++++
pkg/model/loader_test.go | 99 ++++++++++++++++++++++-
6 files changed, 203 insertions(+), 32 deletions(-)
create mode 100644 embedded/models/llama3-instruct.yaml
diff --git a/aio/cpu/text-to-text.yaml b/aio/cpu/text-to-text.yaml
index 6c4ec9e6..cf18f659 100644
--- a/aio/cpu/text-to-text.yaml
+++ b/aio/cpu/text-to-text.yaml
@@ -6,14 +6,22 @@ parameters:
template:
chat_message: |
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
- {{- if .FunctionCall }}{{end}}
- {{- if eq .RoleName "tool" }}{{end }}
- {{- if .Content}}
- {{.Content}}
+ {{- if .FunctionCall }}
+
+ {{- else if eq .RoleName "tool" }}
+
+ {{- end }}
+ {{- if .Content}}
+ {{.Content }}
+ {{- end }}
+ {{- if .FunctionCall}}
+ {{toJson .FunctionCall}}
+ {{- end }}
+ {{- if .FunctionCall }}
+
+ {{- else if eq .RoleName "tool" }}
+
{{- end }}
- {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }}
- {{- if .FunctionCall }} {{end }}
- {{- if eq .RoleName "tool" }}{{end }}
<|im_end|>
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
function: |
diff --git a/aio/gpu-8g/text-to-text.yaml b/aio/gpu-8g/text-to-text.yaml
index 8d5c84f7..0407bb22 100644
--- a/aio/gpu-8g/text-to-text.yaml
+++ b/aio/gpu-8g/text-to-text.yaml
@@ -6,14 +6,22 @@ parameters:
template:
chat_message: |
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
- {{- if .FunctionCall }}{{end}}
- {{- if eq .RoleName "tool" }}{{end }}
- {{- if .Content}}
- {{.Content}}
+ {{- if .FunctionCall }}
+
+ {{- else if eq .RoleName "tool" }}
+
+ {{- end }}
+ {{- if .Content}}
+ {{.Content }}
+ {{- end }}
+ {{- if .FunctionCall}}
+ {{toJson .FunctionCall}}
+ {{- end }}
+ {{- if .FunctionCall }}
+
+ {{- else if eq .RoleName "tool" }}
+
{{- end }}
- {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }}
- {{- if .FunctionCall }} {{end }}
- {{- if eq .RoleName "tool" }}{{end }}
<|im_end|>
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
function: |
diff --git a/aio/intel/text-to-text.yaml b/aio/intel/text-to-text.yaml
index a7cb5b4d..f5f93c14 100644
--- a/aio/intel/text-to-text.yaml
+++ b/aio/intel/text-to-text.yaml
@@ -7,14 +7,22 @@ parameters:
template:
chat_message: |
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
- {{- if .FunctionCall }}{{end}}
- {{- if eq .RoleName "tool" }}{{end }}
- {{- if .Content}}
- {{.Content}}
+ {{- if .FunctionCall }}
+
+ {{- else if eq .RoleName "tool" }}
+
+ {{- end }}
+ {{- if .Content}}
+ {{.Content }}
+ {{- end }}
+ {{- if .FunctionCall}}
+ {{toJson .FunctionCall}}
+ {{- end }}
+ {{- if .FunctionCall }}
+
+ {{- else if eq .RoleName "tool" }}
+
{{- end }}
- {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }}
- {{- if .FunctionCall }} {{end }}
- {{- if eq .RoleName "tool" }}{{end }}
<|im_end|>
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
function: |
diff --git a/embedded/models/hermes-2-pro-mistral.yaml b/embedded/models/hermes-2-pro-mistral.yaml
index 7bfa9418..dd18ce6f 100644
--- a/embedded/models/hermes-2-pro-mistral.yaml
+++ b/embedded/models/hermes-2-pro-mistral.yaml
@@ -6,14 +6,22 @@ parameters:
template:
chat_message: |
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
- {{- if .FunctionCall }}{{end}}
- {{- if eq .RoleName "tool" }}{{end }}
- {{- if .Content}}
- {{.Content}}
+ {{- if .FunctionCall }}
+
+ {{- else if eq .RoleName "tool" }}
+
+ {{- end }}
+ {{- if .Content}}
+ {{.Content }}
+ {{- end }}
+ {{- if .FunctionCall}}
+ {{toJson .FunctionCall}}
+ {{- end }}
+ {{- if .FunctionCall }}
+
+ {{- else if eq .RoleName "tool" }}
+
{{- end }}
- {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }}
- {{- if .FunctionCall }} {{end }}
- {{- if eq .RoleName "tool" }}{{end }}
<|im_end|>
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
function: |
diff --git a/embedded/models/llama3-instruct.yaml b/embedded/models/llama3-instruct.yaml
new file mode 100644
index 00000000..d483d2b2
--- /dev/null
+++ b/embedded/models/llama3-instruct.yaml
@@ -0,0 +1,48 @@
+name: llama3-8b-instruct
+mmap: true
+parameters:
+ model: huggingface://second-state/Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct-Q5_K_M.gguf
+
+template:
+ chat_message: |
+ <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
+
+ {{ if .FunctionCall -}}
+ Function call:
+ {{ else if eq .RoleName "tool" -}}
+ Function response:
+ {{ end -}}
+ {{ if .Content -}}
+ {{.Content -}}
+ {{ else if .FunctionCall -}}
+ {{ toJson .FunctionCall -}}
+ {{ end -}}
+ <|eot_id|>
+ function: |
+ <|start_header_id|>system<|end_header_id|>
+
+ You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
+
+ {{range .Functions}}
+ {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
+ {{end}}
+
+ Use the following pydantic model json schema for each tool call you will make:
+ {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+ Function call:
+ chat: |
+ <|begin_of_text|>{{.Input }}
+ <|start_header_id|>assistant<|end_header_id|>
+ completion: |
+ {{.Input}}
+context_size: 8192
+f16: true
+stopwords:
+- <|im_end|>
+-
+- "<|eot_id|>"
+usage: |
+ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+ "model": "llama3-8b-instruct",
+ "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
+ }'
diff --git a/pkg/model/loader_test.go b/pkg/model/loader_test.go
index e4207b35..d3956b63 100644
--- a/pkg/model/loader_test.go
+++ b/pkg/model/loader_test.go
@@ -27,7 +27,84 @@ const chatML = `<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq
{{- end }}
<|im_end|>`
-var testMatch map[string]map[string]interface{} = map[string]map[string]interface{}{
+const llama3 = `<|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
+
+{{ if .FunctionCall -}}
+Function call:
+{{ else if eq .RoleName "tool" -}}
+Function response:
+{{ end -}}
+{{ if .Content -}}
+{{.Content -}}
+{{ else if .FunctionCall -}}
+{{ toJson .FunctionCall -}}
+{{ end -}}
+<|eot_id|>`
+
+var llama3TestMatch map[string]map[string]interface{} = map[string]map[string]interface{}{
+ "user": {
+ "template": llama3,
+ "expected": "<|start_header_id|>user<|end_header_id|>\n\nA long time ago in a galaxy far, far away...<|eot_id|>",
+ "data": model.ChatMessageTemplateData{
+ SystemPrompt: "",
+ Role: "user",
+ RoleName: "user",
+ Content: "A long time ago in a galaxy far, far away...",
+ FunctionCall: nil,
+ FunctionName: "",
+ LastMessage: false,
+ Function: false,
+ MessageIndex: 0,
+ },
+ },
+ "assistant": {
+ "template": llama3,
+ "expected": "<|start_header_id|>assistant<|end_header_id|>\n\nA long time ago in a galaxy far, far away...<|eot_id|>",
+ "data": model.ChatMessageTemplateData{
+ SystemPrompt: "",
+ Role: "assistant",
+ RoleName: "assistant",
+ Content: "A long time ago in a galaxy far, far away...",
+ FunctionCall: nil,
+ FunctionName: "",
+ LastMessage: false,
+ Function: false,
+ MessageIndex: 0,
+ },
+ },
+ "function_call": {
+ "template": llama3,
+ "expected": "<|start_header_id|>assistant<|end_header_id|>\n\nFunction call:\n{\"function\":\"test\"}<|eot_id|>",
+ "data": model.ChatMessageTemplateData{
+ SystemPrompt: "",
+ Role: "assistant",
+ RoleName: "assistant",
+ Content: "",
+ FunctionCall: map[string]string{"function": "test"},
+ FunctionName: "",
+ LastMessage: false,
+ Function: false,
+ MessageIndex: 0,
+ },
+ },
+ "function_response": {
+ "template": llama3,
+ "expected": "<|start_header_id|>tool<|end_header_id|>\n\nFunction response:\nResponse from tool<|eot_id|>",
+ "data": model.ChatMessageTemplateData{
+ SystemPrompt: "",
+ Role: "tool",
+ RoleName: "tool",
+ Content: "Response from tool",
+ FunctionCall: nil,
+ FunctionName: "",
+ LastMessage: false,
+ Function: false,
+ MessageIndex: 0,
+ },
+ },
+}
+
+var chatMLTestMatch map[string]map[string]interface{} = map[string]map[string]interface{}{
"user": {
"template": chatML,
"expected": "<|im_start|>user\nA long time ago in a galaxy far, far away...\n<|im_end|>",
@@ -91,13 +168,27 @@ var testMatch map[string]map[string]interface{} = map[string]map[string]interfac
}
var _ = Describe("Templates", func() {
- Context("chat message", func() {
+ Context("chat message ChatML", func() {
var modelLoader *ModelLoader
BeforeEach(func() {
modelLoader = NewModelLoader("")
})
- for key := range testMatch {
- foo := testMatch[key]
+ for key := range chatMLTestMatch {
+ foo := chatMLTestMatch[key]
+ It("renders correctly `"+key+"`", func() {
+ templated, err := modelLoader.EvaluateTemplateForChatMessage(foo["template"].(string), foo["data"].(model.ChatMessageTemplateData))
+ Expect(err).ToNot(HaveOccurred())
+ Expect(templated).To(Equal(foo["expected"]), templated)
+ })
+ }
+ })
+ Context("chat message llama3", func() {
+ var modelLoader *ModelLoader
+ BeforeEach(func() {
+ modelLoader = NewModelLoader("")
+ })
+ for key := range llama3TestMatch {
+ foo := llama3TestMatch[key]
It("renders correctly `"+key+"`", func() {
templated, err := modelLoader.EvaluateTemplateForChatMessage(foo["template"].(string), foo["data"].(model.ChatMessageTemplateData))
Expect(err).ToNot(HaveOccurred())
From 1e3710193065cf79640cbe10bb0c1440313da6b7 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sat, 20 Apr 2024 02:05:16 +0200
Subject: [PATCH 0185/2750] :arrow_up: Update ggerganov/llama.cpp (#2080)
Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index e2bfa594..b9af4612 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=0d56246f4b9764158525d894b96606f6163c53a8
+CPPLLAMA_VERSION?=0e4802b2ecbaab04b4f829fde4a3096ca19c84b5
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
From b9e770864356e82b8720be246e169ba2abedbf08 Mon Sep 17 00:00:00 2001
From: cryptk <421501+cryptk@users.noreply.github.com>
Date: Fri, 19 Apr 2024 19:31:15 -0500
Subject: [PATCH 0186/2750] feat: enable polling configs for systems with
broken fsnotify (docker volumes on windows) (#2081)
* feat: enable polling configs for systems with broken fsnotify (docker volumes on windows)
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
* fix: update logging to make it clear that the config file is being polled
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
---------
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
---
core/cli/run.go | 16 +++++++++-------
core/config/application_config.go | 7 +++++++
core/startup/config_file_watcher.go | 15 +++++++++++++++
docs/content/docs/advanced/advanced-usage.md | 1 +
4 files changed, 32 insertions(+), 7 deletions(-)
diff --git a/core/cli/run.go b/core/cli/run.go
index d729f946..02d863cd 100644
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -14,13 +14,14 @@ import (
type RunCMD struct {
ModelArgs []string `arg:"" optional:"" name:"models" help:"Model configuration URLs to load"`
- ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
- BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
- ImagePath string `env:"LOCALAI_IMAGE_PATH,IMAGE_PATH" type:"path" default:"/tmp/generated/images" help:"Location for images generated by backends (e.g. stablediffusion)" group:"storage"`
- AudioPath string `env:"LOCALAI_AUDIO_PATH,AUDIO_PATH" type:"path" default:"/tmp/generated/audio" help:"Location for audio generated by backends (e.g. piper)" group:"storage"`
- UploadPath string `env:"LOCALAI_UPLOAD_PATH,UPLOAD_PATH" type:"path" default:"/tmp/localai/upload" help:"Path to store uploads from files api" group:"storage"`
- ConfigPath string `env:"LOCALAI_CONFIG_PATH,CONFIG_PATH" default:"/tmp/localai/config" group:"storage"`
- LocalaiConfigDir string `env:"LOCALAI_CONFIG_DIR" type:"path" default:"${basepath}/configuration" help:"Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json)" group:"storage"`
+ ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
+ BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
+ ImagePath string `env:"LOCALAI_IMAGE_PATH,IMAGE_PATH" type:"path" default:"/tmp/generated/images" help:"Location for images generated by backends (e.g. stablediffusion)" group:"storage"`
+ AudioPath string `env:"LOCALAI_AUDIO_PATH,AUDIO_PATH" type:"path" default:"/tmp/generated/audio" help:"Location for audio generated by backends (e.g. piper)" group:"storage"`
+ UploadPath string `env:"LOCALAI_UPLOAD_PATH,UPLOAD_PATH" type:"path" default:"/tmp/localai/upload" help:"Path to store uploads from files api" group:"storage"`
+ ConfigPath string `env:"LOCALAI_CONFIG_PATH,CONFIG_PATH" default:"/tmp/localai/config" group:"storage"`
+ LocalaiConfigDir string `env:"LOCALAI_CONFIG_DIR" type:"path" default:"${basepath}/configuration" help:"Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json)" group:"storage"`
+ LocalaiConfigDirPollInterval time.Duration `env:"LOCALAI_CONFIG_DIR_POLL_INTERVAL" help:"Typically the config path picks up changes automatically, but if your system has broken fsnotify events, set this to an interval to poll the LocalAI Config Dir (example: 1m)" group:"storage"`
// The alias on this option is there to preserve functionality with the old `--config-file` parameter
ModelsConfigFile string `env:"LOCALAI_MODELS_CONFIG_FILE,CONFIG_FILE" aliases:"config-file" help:"YAML file containing a list of model backend configs" group:"storage"`
@@ -65,6 +66,7 @@ func (r *RunCMD) Run(ctx *Context) error {
config.WithUploadDir(r.UploadPath),
config.WithConfigsDir(r.ConfigPath),
config.WithDynamicConfigDir(r.LocalaiConfigDir),
+ config.WithDynamicConfigDirPollInterval(r.LocalaiConfigDirPollInterval),
config.WithF16(r.F16),
config.WithStringGalleries(r.Galleries),
config.WithModelLibraryURL(r.RemoteLibrary),
diff --git a/core/config/application_config.go b/core/config/application_config.go
index 77817616..d4adee18 100644
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@@ -23,6 +23,7 @@ type ApplicationConfig struct {
UploadDir string
ConfigsDir string
DynamicConfigsDir string
+ DynamicConfigsDirPollInterval time.Duration
CORS bool
PreloadJSONModels string
PreloadModelsFromPath string
@@ -271,6 +272,12 @@ func WithDynamicConfigDir(dynamicConfigsDir string) AppOption {
}
}
+func WithDynamicConfigDirPollInterval(interval time.Duration) AppOption {
+ return func(o *ApplicationConfig) {
+ o.DynamicConfigsDirPollInterval = interval
+ }
+}
+
func WithApiKeys(apiKeys []string) AppOption {
return func(o *ApplicationConfig) {
o.ApiKeys = apiKeys
diff --git a/core/startup/config_file_watcher.go b/core/startup/config_file_watcher.go
index 5d213df5..5f6834d4 100644
--- a/core/startup/config_file_watcher.go
+++ b/core/startup/config_file_watcher.go
@@ -5,6 +5,7 @@ import (
"fmt"
"os"
"path"
+ "time"
"github.com/fsnotify/fsnotify"
"github.com/go-skynet/LocalAI/core/config"
@@ -66,6 +67,20 @@ func (c *configFileHandler) Watch() error {
log.Fatal().Err(err).Str("configdir", c.configDir).Msg("wnable to create a watcher for configuration directory")
}
+ if c.appConfig.DynamicConfigsDirPollInterval > 0 {
+ log.Debug().Msg("Poll interval set, falling back to polling for configuration changes")
+ ticker := time.NewTicker(c.appConfig.DynamicConfigsDirPollInterval)
+ go func() {
+ for {
+ <-ticker.C
+ for file, handler := range c.handlers {
+ log.Debug().Str("file", file).Msg("polling config file")
+ c.callHandler(file, handler)
+ }
+ }
+ }()
+ }
+
// Start listening for events.
go func() {
for {
diff --git a/docs/content/docs/advanced/advanced-usage.md b/docs/content/docs/advanced/advanced-usage.md
index 4bd16030..cbf7dba3 100644
--- a/docs/content/docs/advanced/advanced-usage.md
+++ b/docs/content/docs/advanced/advanced-usage.md
@@ -402,6 +402,7 @@ In the help text below, BASEPATH is the location that local-ai is being executed
| --upload-path | /tmp/localai/upload | Path to store uploads from files api | $LOCALAI_UPLOAD_PATH |
| --config-path | /tmp/localai/config | | $LOCALAI_CONFIG_PATH |
| --localai-config-dir | BASEPATH/configuration | Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json) | $LOCALAI_CONFIG_DIR |
+| --localai-config-dir-poll-interval | | Typically the config path picks up changes automatically, but if your system has broken fsnotify events, set this to a time duration to poll the LocalAI Config Dir (example: 1m) | $LOCALAI_CONFIG_DIR_POLL_INTERVAL |
| --models-config-file | STRING | YAML file containing a list of model backend configs | $LOCALAI_MODELS_CONFIG_FILE |
#### Models Flags
From 1038f7469c72e44e19cabaa0af474cb75d2b6121 Mon Sep 17 00:00:00 2001
From: Dave
Date: Sat, 20 Apr 2024 04:42:02 -0400
Subject: [PATCH 0187/2750] fix: action-tmate: use connect-timeout-sections and
limit-access-to-actor (#2083)
fix for action-tmate: connect-timeout-sections and limit-access-to-actor
Signed-off-by: Dave Lee
---
.github/workflows/test.yml | 18 ++++++++++++------
1 file changed, 12 insertions(+), 6 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 46c4e065..9eb4f084 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -121,8 +121,10 @@ jobs:
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
- name: Setup tmate session if tests fail
if: ${{ failure() }}
- uses: mxschmitt/action-tmate@v3
- timeout-minutes: 5
+ uses: mxschmitt/action-tmate@v3.18
+ with:
+ connect-timeout-seconds: 180
+ limit-access-to-actor: true
tests-aio-container:
runs-on: ubuntu-latest
@@ -173,8 +175,10 @@ jobs:
make run-e2e-aio
- name: Setup tmate session if tests fail
if: ${{ failure() }}
- uses: mxschmitt/action-tmate@v3
- timeout-minutes: 5
+ uses: mxschmitt/action-tmate@v3.18
+ with:
+ connect-timeout-seconds: 180
+ limit-access-to-actor: true
tests-apple:
runs-on: macOS-14
@@ -207,5 +211,7 @@ jobs:
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make --jobs 4 --output-sync=target test
- name: Setup tmate session if tests fail
if: ${{ failure() }}
- uses: mxschmitt/action-tmate@v3
- timeout-minutes: 5
\ No newline at end of file
+ uses: mxschmitt/action-tmate@v3.18
+ with:
+ connect-timeout-seconds: 180
+ limit-access-to-actor: true
\ No newline at end of file
From 8d30b39811fa1a00e9b8443a0b9f1db6e5609b5a Mon Sep 17 00:00:00 2001
From: cryptk <421501+cryptk@users.noreply.github.com>
Date: Sat, 20 Apr 2024 03:43:37 -0500
Subject: [PATCH 0188/2750] feat: fiber logs with zerlog and add trace level
(#2082)
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
---
core/cli/cli.go | 2 +-
core/cli/run.go | 5 ++---
core/config/application_config.go | 19 ++++++------------
core/http/api.go | 33 +++++++++++++++++++++----------
core/http/api_test.go | 1 -
core/startup/startup.go | 6 ------
go.mod | 3 ++-
go.sum | 4 ++++
main.go | 11 +++++++----
9 files changed, 45 insertions(+), 39 deletions(-)
diff --git a/core/cli/cli.go b/core/cli/cli.go
index 5e757f64..2f2dcd8b 100644
--- a/core/cli/cli.go
+++ b/core/cli/cli.go
@@ -4,7 +4,7 @@ import "embed"
type Context struct {
Debug bool `env:"LOCALAI_DEBUG,DEBUG" default:"false" hidden:"" help:"DEPRECATED, use --log-level=debug instead. Enable debug logging"`
- LogLevel *string `env:"LOCALAI_LOG_LEVEL" enum:"error,warn,info,debug" help:"Set the level of logs to output [${enum}]"`
+ LogLevel *string `env:"LOCALAI_LOG_LEVEL" enum:"error,warn,info,debug,trace" help:"Set the level of logs to output [${enum}]"`
// This field is not a command line argument/flag, the struct tag excludes it from the parsed CLI
BackendAssets embed.FS `kong:"-"`
diff --git a/core/cli/run.go b/core/cli/run.go
index 02d863cd..16e65725 100644
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -8,6 +8,7 @@ import (
"github.com/go-skynet/LocalAI/core/config"
"github.com/go-skynet/LocalAI/core/http"
"github.com/go-skynet/LocalAI/core/startup"
+ "github.com/rs/zerolog"
"github.com/rs/zerolog/log"
)
@@ -60,7 +61,7 @@ func (r *RunCMD) Run(ctx *Context) error {
config.WithYAMLConfigPreload(r.PreloadModelsConfig),
config.WithModelPath(r.ModelsPath),
config.WithContextSize(r.ContextSize),
- config.WithDebug(*ctx.LogLevel == "debug"),
+ config.WithDebug(zerolog.GlobalLevel() <= zerolog.DebugLevel),
config.WithImageDir(r.ImagePath),
config.WithAudioDir(r.AudioPath),
config.WithUploadDir(r.UploadPath),
@@ -70,7 +71,6 @@ func (r *RunCMD) Run(ctx *Context) error {
config.WithF16(r.F16),
config.WithStringGalleries(r.Galleries),
config.WithModelLibraryURL(r.RemoteLibrary),
- config.WithDisableMessage(false),
config.WithCors(r.CORS),
config.WithCorsAllowOrigins(r.CORSAllowOrigins),
config.WithThreads(r.Threads),
@@ -131,7 +131,6 @@ func (r *RunCMD) Run(ctx *Context) error {
}
cl, ml, options, err := startup.Startup(opts...)
-
if err != nil {
return fmt.Errorf("failed basic startup tasks with error %s", err.Error())
}
diff --git a/core/config/application_config.go b/core/config/application_config.go
index d4adee18..2d733c1e 100644
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@@ -17,7 +17,7 @@ type ApplicationConfig struct {
UploadLimitMB, Threads, ContextSize int
DisableWelcomePage bool
F16 bool
- Debug, DisableMessage bool
+ Debug bool
ImageDir string
AudioDir string
UploadDir string
@@ -57,12 +57,11 @@ type AppOption func(*ApplicationConfig)
func NewApplicationConfig(o ...AppOption) *ApplicationConfig {
opt := &ApplicationConfig{
- Context: context.Background(),
- UploadLimitMB: 15,
- Threads: 1,
- ContextSize: 512,
- Debug: true,
- DisableMessage: true,
+ Context: context.Background(),
+ UploadLimitMB: 15,
+ Threads: 1,
+ ContextSize: 512,
+ Debug: true,
}
for _, oo := range o {
oo(opt)
@@ -236,12 +235,6 @@ func WithDebug(debug bool) AppOption {
}
}
-func WithDisableMessage(disableMessage bool) AppOption {
- return func(o *ApplicationConfig) {
- o.DisableMessage = disableMessage
- }
-}
-
func WithAudioDir(audioDir string) AppOption {
return func(o *ApplicationConfig) {
o.AudioDir = audioDir
diff --git a/core/http/api.go b/core/http/api.go
index af38512a..fe8f711c 100644
--- a/core/http/api.go
+++ b/core/http/api.go
@@ -7,7 +7,6 @@ import (
"strings"
"github.com/go-skynet/LocalAI/pkg/utils"
- "github.com/gofiber/swagger" // swagger handler
"github.com/go-skynet/LocalAI/core/http/endpoints/elevenlabs"
"github.com/go-skynet/LocalAI/core/http/endpoints/localai"
@@ -19,10 +18,13 @@ import (
"github.com/go-skynet/LocalAI/internal"
"github.com/go-skynet/LocalAI/pkg/model"
+ "github.com/gofiber/contrib/fiberzerolog"
"github.com/gofiber/fiber/v2"
"github.com/gofiber/fiber/v2/middleware/cors"
- "github.com/gofiber/fiber/v2/middleware/logger"
"github.com/gofiber/fiber/v2/middleware/recover"
+ "github.com/gofiber/swagger" // swagger handler
+
+ "github.com/rs/zerolog/log"
)
func readAuthHeader(c *fiber.Ctx) string {
@@ -59,9 +61,11 @@ func readAuthHeader(c *fiber.Ctx) string {
func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (*fiber.App, error) {
// Return errors as JSON responses
app := fiber.New(fiber.Config{
- Views: renderEngine(),
- BodyLimit: appConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
- DisableStartupMessage: appConfig.DisableMessage,
+ Views: renderEngine(),
+ BodyLimit: appConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
+ // We disable the Fiber startup message as it does not conform to structured logging.
+ // We register a startup log line with connection information in the OnListen hook to keep things user friendly though
+ DisableStartupMessage: true,
// Override default error handler
ErrorHandler: func(ctx *fiber.Ctx, err error) error {
// Status code defaults to 500
@@ -82,11 +86,20 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
},
})
- if appConfig.Debug {
- app.Use(logger.New(logger.Config{
- Format: "[${ip}]:${port} ${status} - ${method} ${path}\n",
- }))
- }
+ app.Hooks().OnListen(func(listenData fiber.ListenData) error {
+ scheme := "http"
+ if listenData.TLS {
+ scheme = "https"
+ }
+ log.Info().Str("endpoint", scheme+"://"+listenData.Host+":"+listenData.Port).Msg("LocalAI API is listening! Please connect to the endpoint for API documentation.")
+ return nil
+ })
+
+ // Have Fiber use zerolog like the rest of the application rather than it's built-in logger
+ logger := log.Logger
+ app.Use(fiberzerolog.New(fiberzerolog.Config{
+ Logger: &logger,
+ }))
// Default middleware config
diff --git a/core/http/api_test.go b/core/http/api_test.go
index 1553ed21..35e0a8bf 100644
--- a/core/http/api_test.go
+++ b/core/http/api_test.go
@@ -211,7 +211,6 @@ var _ = Describe("API test", func() {
commonOpts := []config.AppOption{
config.WithDebug(true),
- config.WithDisableMessage(true),
}
Context("API with ephemeral models", func() {
diff --git a/core/startup/startup.go b/core/startup/startup.go
index af92f0e1..97882a22 100644
--- a/core/startup/startup.go
+++ b/core/startup/startup.go
@@ -10,18 +10,12 @@ import (
"github.com/go-skynet/LocalAI/pkg/assets"
"github.com/go-skynet/LocalAI/pkg/model"
pkgStartup "github.com/go-skynet/LocalAI/pkg/startup"
- "github.com/rs/zerolog"
"github.com/rs/zerolog/log"
)
func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.ModelLoader, *config.ApplicationConfig, error) {
options := config.NewApplicationConfig(opts...)
- zerolog.SetGlobalLevel(zerolog.InfoLevel)
- if options.Debug {
- zerolog.SetGlobalLevel(zerolog.DebugLevel)
- }
-
log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.ModelPath)
log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion())
diff --git a/go.mod b/go.mod
index 99af8ce7..0bf9aa02 100644
--- a/go.mod
+++ b/go.mod
@@ -29,7 +29,7 @@ require (
github.com/otiai10/openaigo v1.6.0
github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5
github.com/prometheus/client_golang v1.17.0
- github.com/rs/zerolog v1.31.0
+ github.com/rs/zerolog v1.32.0
github.com/russross/blackfriday v1.6.0
github.com/sashabaranov/go-openai v1.20.4
github.com/schollz/progressbar/v3 v3.13.1
@@ -145,6 +145,7 @@ require (
github.com/go-audio/riff v1.0.0 // indirect
github.com/go-logr/logr v1.2.4 // indirect
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
+ github.com/gofiber/contrib/fiberzerolog v1.0.0
github.com/google/go-cmp v0.6.0 // indirect
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect
github.com/hashicorp/errwrap v1.0.0 // indirect
diff --git a/go.sum b/go.sum
index a421e79c..55fdaf06 100644
--- a/go.sum
+++ b/go.sum
@@ -100,6 +100,8 @@ github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg78
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
+github.com/gofiber/contrib/fiberzerolog v1.0.0 h1:IB8q+NO2zPNS4VHKde1x5DqtMJ5vGrvDCydnAjlFw3E=
+github.com/gofiber/contrib/fiberzerolog v1.0.0/go.mod h1:SOi+Wo7RQlO/HV0jsYTu6uFQy+8ZPTzCZW4fDEKD3l8=
github.com/gofiber/fiber/v2 v2.52.4 h1:P+T+4iK7VaqUsq2PALYEfBBo6bJZ4q3FP8cZ84EggTM=
github.com/gofiber/fiber/v2 v2.52.4/go.mod h1:KEOE+cXMhXG0zHc9d8+E38hoX+ZN7bhOtgeF2oT6jrQ=
github.com/gofiber/swagger v1.0.0 h1:BzUzDS9ZT6fDUa692kxmfOjc1DZiloLiPK/W5z1H1tc=
@@ -281,6 +283,8 @@ github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUz
github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
github.com/rs/zerolog v1.31.0 h1:FcTR3NnLWW+NnTwwhFWiJSZr4ECLpqCm6QsEnyvbV4A=
github.com/rs/zerolog v1.31.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
+github.com/rs/zerolog v1.32.0 h1:keLypqrlIjaFsbmJOBdB/qvyF8KEtCWHwobLp5l/mQ0=
+github.com/rs/zerolog v1.32.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
github.com/russross/blackfriday v1.6.0 h1:KqfZb0pUVN2lYqZUYRddxF4OR8ZMURnJIG5Y3VRLtww=
github.com/russross/blackfriday v1.6.0/go.mod h1:ti0ldHuxg49ri4ksnFxlkCfN+hvslNlmVHqNRXXJNAY=
github.com/sashabaranov/go-openai v1.20.4 h1:095xQ/fAtRa0+Rj21sezVJABgKfGPNbyx/sAN/hJUmg=
diff --git a/main.go b/main.go
index 8b5696d1..0b40175e 100644
--- a/main.go
+++ b/main.go
@@ -91,17 +91,20 @@ Version: ${version}
switch *cli.CLI.LogLevel {
case "error":
- log.Info().Msg("Setting logging to error")
zerolog.SetGlobalLevel(zerolog.ErrorLevel)
+ log.Info().Msg("Setting logging to error")
case "warn":
- log.Info().Msg("Setting logging to warn")
zerolog.SetGlobalLevel(zerolog.WarnLevel)
+ log.Info().Msg("Setting logging to warn")
case "info":
- log.Info().Msg("Setting logging to info")
zerolog.SetGlobalLevel(zerolog.InfoLevel)
+ log.Info().Msg("Setting logging to info")
case "debug":
- log.Info().Msg("Setting logging to debug")
zerolog.SetGlobalLevel(zerolog.DebugLevel)
+ log.Debug().Msg("Setting logging to debug")
+ case "trace":
+ zerolog.SetGlobalLevel(zerolog.TraceLevel)
+ log.Trace().Msg("Setting logging to trace")
}
// Populate the application with the embedded backend assets
From b319ed58b026f91f48599c62c85eec5fbbc8764b Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Sat, 20 Apr 2024 15:22:54 +0200
Subject: [PATCH 0189/2750] models(gallery): add gallery (#2078)
Signed-off-by: Ettore Di Giacinto
---
core/cli/run.go | 2 +-
gallery/bert-embeddings.yaml | 15 ++
gallery/index.yaml | 503 +++++++++++++++++++++++++++++++++++
gallery/stablediffusion.yaml | 54 ++++
gallery/tinydream.yaml | 42 +++
gallery/virtual.yaml | 6 +
gallery/whisper-base.yaml | 18 ++
main.go | 1 +
8 files changed, 640 insertions(+), 1 deletion(-)
create mode 100644 gallery/bert-embeddings.yaml
create mode 100644 gallery/index.yaml
create mode 100644 gallery/stablediffusion.yaml
create mode 100644 gallery/tinydream.yaml
create mode 100644 gallery/virtual.yaml
create mode 100644 gallery/whisper-base.yaml
diff --git a/core/cli/run.go b/core/cli/run.go
index 16e65725..42185a28 100644
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -26,7 +26,7 @@ type RunCMD struct {
// The alias on this option is there to preserve functionality with the old `--config-file` parameter
ModelsConfigFile string `env:"LOCALAI_MODELS_CONFIG_FILE,CONFIG_FILE" aliases:"config-file" help:"YAML file containing a list of model backend configs" group:"storage"`
- Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models"`
+ Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"`
AutoloadGalleries bool `env:"LOCALAI_AUTOLOAD_GALLERIES,AUTOLOAD_GALLERIES" group:"models"`
RemoteLibrary string `env:"LOCALAI_REMOTE_LIBRARY,REMOTE_LIBRARY" default:"${remoteLibraryURL}" help:"A LocalAI remote library URL" group:"models"`
PreloadModels string `env:"LOCALAI_PRELOAD_MODELS,PRELOAD_MODELS" help:"A List of models to apply in JSON at start" group:"models"`
diff --git a/gallery/bert-embeddings.yaml b/gallery/bert-embeddings.yaml
new file mode 100644
index 00000000..0798bf54
--- /dev/null
+++ b/gallery/bert-embeddings.yaml
@@ -0,0 +1,15 @@
+name: "bert-embeddings"
+license: "Apache 2.0"
+urls:
+- https://huggingface.co/skeskinen/ggml
+description: |
+ Bert model that can be used for embeddings
+config_file: |
+ parameters:
+ model: bert-MiniLM-L6-v2q4_0.bin
+ backend: bert-embeddings
+ embeddings: true
+files:
+- filename: "bert-MiniLM-L6-v2q4_0.bin"
+ sha256: "a5a174d8772c8a569faf9f3136c441f2c3855b5bf35ed32274294219533feaad"
+ uri: "https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin"
\ No newline at end of file
diff --git a/gallery/index.yaml b/gallery/index.yaml
new file mode 100644
index 00000000..6b882768
--- /dev/null
+++ b/gallery/index.yaml
@@ -0,0 +1,503 @@
+## Whisper
+- url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master"
+ name: "whisper-1"
+ license: other
+## Bert embeddings
+- url: "github:mudler/LocalAI/gallery/bert-embeddings.yaml@master"
+ name: "bert-embeddings"
+ license: other
+- url: "github:mudler/LocalAI/gallery/bert-embeddings.yaml@master"
+ name: "text-embedding-ada-002"
+ license: other
+## Stable Diffusion
+- url: github:mudler/LocalAI/gallery/stablediffusion.yaml@master
+ name: stablediffusion
+ license: other
+## Tiny Dream
+- url: github:mudler/LocalAI/gallery/tinydream.yaml@master
+ name: tinydream
+ license: other
+## Piper TTS
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-en-us-kathleen-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-en-us-kathleen-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-kathleen-low.tar.gz
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-ca-upc_ona-x-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-ca-upc_ona-x-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ca-upc_ona-x-low.tar.gz
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-ca-upc_pau-x-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-ca-upc_pau-x-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ca-upc_pau-x-low.tar.gz
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-da-nst_talesyntese-medium
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-da-nst_talesyntese-medium.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-da-nst_talesyntese-medium.tar.gz
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-de-eva_k-x-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-de-eva_k-x-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-eva_k-x-low.tar.gz
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-de-karlsson-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-de-karlsson-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-karlsson-low.tar.gz
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-de-kerstin-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-de-kerstin-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-kerstin-low.tar.gz
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-de-pavoque-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-de-pavoque-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-pavoque-low.tar.gz
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-de-ramona-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-de-ramona-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-ramona-low.tar.gz
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-de-thorsten-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-de-thorsten-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-thorsten-low.tar.gz
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-el-gr-rapunzelina-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-el-gr-rapunzelina-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-el-gr-rapunzelina-low.tar.gz
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-en-gb-alan-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-en-gb-alan-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-gb-alan-low.tar.gz
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-en-gb-southern_english_female-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-en-gb-southern_english_female-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-gb-southern_english_female-low.tar.gz
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-en-us-amy-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-en-us-amy-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-en-us-danny-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-en-us-danny-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-danny-low.tar.gz
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-en-us-kathleen-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-en-us-kathleen-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-kathleen-low.tar.gz
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-en-us-lessac-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-en-us-lessac-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-lessac-low.tar.gz
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-en-us-lessac-medium
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-en-us-lessac-medium.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-lessac-medium.tar.gz
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-en-us-libritts-high
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-en-us-libritts-high.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-libritts-high.tar.gz
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-en-us-ryan-high
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-en-us-ryan-high.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-ryan-high.tar.gz
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-en-us-ryan-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-en-us-ryan-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-ryan-low.tar.gz
+
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-en-us-ryan-medium
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-en-us-ryan-medium.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-ryan-medium.tar.gz
+
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-en-us_lessac
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-en-us_lessac.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us_lessac.tar.gz
+
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-es-carlfm-x-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-es-carlfm-x-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-es-carlfm-x-low.tar.gz
+
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-es-mls_10246-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-es-mls_10246-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-es-mls_10246-low.tar.gz
+
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-es-mls_9972-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-es-mls_9972-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-es-mls_9972-low.tar.gz
+
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-fi-harri-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-fi-harri-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fi-harri-low.tar.gz
+
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-fr-gilles-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-fr-gilles-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-gilles-low.tar.gz
+
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-fr-mls_1840-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-fr-mls_1840-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-mls_1840-low.tar.gz
+
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-fr-siwis-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-fr-siwis-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-siwis-low.tar.gz
+
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-fr-siwis-medium
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-fr-siwis-medium.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-siwis-medium.tar.gz
+
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-is-bui-medium
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-is-bui-medium.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-is-bui-medium.tar.gz
+
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-is-salka-medium
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-is-salka-medium.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-is-salka-medium.tar.gz
+
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-is-steinn-medium
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-is-steinn-medium.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-is-steinn-medium.tar.gz
+
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-is-ugla-medium
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-is-ugla-medium.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-is-ugla-medium.tar.gz
+
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-it-riccardo_fasol-x-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-it-riccardo_fasol-x-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-it-riccardo_fasol-x-low.tar.gz
+
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-kk-iseke-x-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-kk-iseke-x-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-kk-iseke-x-low.tar.gz
+
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-kk-issai-high
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-kk-issai-high.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-kk-issai-high.tar.gz
+
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-kk-raya-x-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-kk-raya-x-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-kk-raya-x-low.tar.gz
+
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-ne-google-medium
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-ne-google-medium.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ne-google-medium.tar.gz
+
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-ne-google-x-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-ne-google-x-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ne-google-x-low.tar.gz
+
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-nl-mls_5809-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-nl-mls_5809-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-mls_5809-low.tar.gz
+
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-nl-mls_7432-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-nl-mls_7432-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-mls_7432-low.tar.gz
+
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-nl-nathalie-x-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-nl-nathalie-x-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-nathalie-x-low.tar.gz
+
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-nl-rdh-medium
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-nl-rdh-medium.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-rdh-medium.tar.gz
+
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-nl-rdh-x-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-nl-rdh-x-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-rdh-x-low.tar.gz
+
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-no-talesyntese-medium
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-no-talesyntese-medium.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-no-talesyntese-medium.tar.gz
+
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-pl-mls_6892-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-pl-mls_6892-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-pl-mls_6892-low.tar.gz
+
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-pt-br-edresson-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-pt-br-edresson-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-pt-br-edresson-low.tar.gz
+
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-ru-irinia-medium
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-ru-irinia-medium.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ru-irinia-medium.tar.gz
+
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-sv-se-nst-medium
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-sv-se-nst-medium.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-sv-se-nst-medium.tar.gz
+
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-uk-lada-x-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-uk-lada-x-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-uk-lada-x-low.tar.gz
+
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-vi-25hours-single-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-vi-25hours-single-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-vi-25hours-single-low.tar.gz
+
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-vi-vivos-x-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-vi-vivos-x-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-vi-vivos-x-low.tar.gz
+
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-zh-cn-huayan-x-low
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-zh-cn-huayan-x-low.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-zh-cn-huayan-x-low.tar.gz
+
+- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+ name: voice-zh_CN-huayan-medium
+ license: other
+ urls:
+ - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ files:
+ - filename: voice-zh_CN-huayan-medium.tar.gz
+ uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-zh_CN-huayan-medium.tar.gz
\ No newline at end of file
diff --git a/gallery/stablediffusion.yaml b/gallery/stablediffusion.yaml
new file mode 100644
index 00000000..c8a0eb8b
--- /dev/null
+++ b/gallery/stablediffusion.yaml
@@ -0,0 +1,54 @@
+name: "stablediffusion-cpp"
+license: "BSD-3"
+urls:
+- https://github.com/EdVince/Stable-Diffusion-NCNN
+- https://github.com/EdVince/Stable-Diffusion-NCNN/blob/main/LICENSE
+
+description: |
+ Stable Diffusion in NCNN with c++, supported txt2img and img2img
+config_file: |
+ name: stablediffusion-cpp
+ backend: stablediffusion
+ parameters:
+ model: stablediffusion_assets
+
+files:
+- filename: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param"
+ sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82"
+ uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param"
+- filename: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param"
+ sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20"
+ uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param"
+- filename: "stablediffusion_assets/AutoencoderKL-base-fp16.param"
+ sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba"
+ uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param"
+- filename: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin"
+ sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa"
+ uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-encoder-512-512-fp16.bin"
+- filename: "stablediffusion_assets/AutoencoderKL-fp16.bin"
+ sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd"
+ uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-fp16.bin"
+- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin"
+ sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6"
+ uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/FrozenCLIPEmbedder-fp16.bin"
+- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param"
+ sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9"
+ uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param"
+- filename: "stablediffusion_assets/log_sigmas.bin"
+ sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac"
+ uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin"
+- filename: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param"
+ sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1"
+ uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param"
+- filename: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param"
+ sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa"
+ uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param"
+- filename: "stablediffusion_assets/UNetModel-base-MHA-fp16.param"
+ sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d"
+ uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param"
+- filename: "stablediffusion_assets/UNetModel-MHA-fp16.bin"
+ sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3"
+ uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/UNetModel-MHA-fp16.bin"
+- filename: "stablediffusion_assets/vocab.txt"
+ sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
+ uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt"
\ No newline at end of file
diff --git a/gallery/tinydream.yaml b/gallery/tinydream.yaml
new file mode 100644
index 00000000..415762de
--- /dev/null
+++ b/gallery/tinydream.yaml
@@ -0,0 +1,42 @@
+name: "tinydream"
+license: "BSD-3"
+urls:
+ - https://github.com/symisc/tiny-dream
+ - https://github.com/symisc/tiny-dream/blob/main/LICENSE
+
+description: |
+ An embedded, Header Only, Stable Diffusion C++ implementation
+config_file: |
+ name: tinydream
+ backend: tinydream
+ parameters:
+ model: tinydream_assets
+
+files:
+ - filename: "tinydream_assets/AutoencoderKL-fp16.bin"
+ sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd"
+ uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/AutoencoderKL-fp16.bin"
+ - filename: "tinydream_assets/AutoencoderKL-fp16.param"
+ sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba"
+ uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/AutoencoderKL-fp16.param"
+ - filename: "tinydream_assets/FrozenCLIPEmbedder-fp16.bin"
+ sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6"
+ uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/FrozenCLIPEmbedder-fp16.bin"
+ - filename: "tinydream_assets/FrozenCLIPEmbedder-fp16.param"
+ sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9"
+ uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/FrozenCLIPEmbedder-fp16.param"
+ - filename: "tinydream_assets/RealESRGAN_x4plus_anime.bin"
+ sha256: "fe01c269cfd10cdef8e018ab66ebe750cf79c7af4d1f9c16c737e1295229bacc"
+ uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/RealESRGAN_x4plus_anime.bin"
+ - filename: "tinydream_assets/RealESRGAN_x4plus_anime.param"
+ sha256: "2b8fb6e0ae4d2d85704ca08c119a2f5ea40add4f2ecd512eb7f4cd44b6127ed4"
+ uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/RealESRGAN_x4plus_anime.param"
+ - filename: "tinydream_assets/UNetModel-fp16.bin"
+ sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3"
+ uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/UNetModel-fp16.bin"
+ - filename: "tinydream_assets/UNetModel-fp16.param"
+ sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d"
+ uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/UNetModel-fp16.param"
+ - filename: "tinydream_assets/vocab.txt"
+ sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
+ uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/vocab.txt"
\ No newline at end of file
diff --git a/gallery/virtual.yaml b/gallery/virtual.yaml
new file mode 100644
index 00000000..054c3257
--- /dev/null
+++ b/gallery/virtual.yaml
@@ -0,0 +1,6 @@
+name: "virtual"
+
+description: |
+ A Base model definition
+
+license: "N/A"
\ No newline at end of file
diff --git a/gallery/whisper-base.yaml b/gallery/whisper-base.yaml
new file mode 100644
index 00000000..574dbb13
--- /dev/null
+++ b/gallery/whisper-base.yaml
@@ -0,0 +1,18 @@
+name: "whisper-base"
+license: "MIT"
+urls:
+- https://github.com/ggerganov/whisper.cpp
+- https://huggingface.co/ggerganov/whisper.cpp
+
+description: |
+ Port of OpenAI's Whisper model in C/C++
+
+config_file: |
+ backend: whisper
+ parameters:
+ model: ggml-whisper-base.bin
+
+files:
+- filename: "ggml-whisper-base.bin"
+ sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
+ uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"
\ No newline at end of file
diff --git a/main.go b/main.go
index 0b40175e..9976906b 100644
--- a/main.go
+++ b/main.go
@@ -72,6 +72,7 @@ Version: ${version}
kong.Vars{
"basepath": kong.ExpandPath("."),
"remoteLibraryURL": "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/model_library.yaml",
+ "galleries": `[{"name":"localai", "url":"github:mudler/LocalAI/gallery/index.yaml"}]`,
"version": internal.PrintableVersion(),
},
)
From 03adc1f60d97ae7cd5d3b1e58c5511e36c5f4eba Mon Sep 17 00:00:00 2001
From: Taikono-Himazin
Date: Sat, 20 Apr 2024 23:37:02 +0900
Subject: [PATCH 0190/2750] Add tensor_parallel_size setting to vllm setting
items (#2085)
Signed-off-by: Taikono-Himazin
---
backend/backend.proto | 1 +
backend/python/vllm/backend_vllm.py | 2 ++
core/backend/options.go | 1 +
core/config/backend_config.go | 1 +
4 files changed, 5 insertions(+)
diff --git a/backend/backend.proto b/backend/backend.proto
index 62e1a1a6..ec01e4a7 100644
--- a/backend/backend.proto
+++ b/backend/backend.proto
@@ -177,6 +177,7 @@ message ModelOptions {
bool EnforceEager = 52;
int32 SwapSpace = 53;
int32 MaxModelLen = 54;
+ int32 TensorParallelSize = 55;
string MMProj = 41;
diff --git a/backend/python/vllm/backend_vllm.py b/backend/python/vllm/backend_vllm.py
index ff0f0b26..2d8b55db 100644
--- a/backend/python/vllm/backend_vllm.py
+++ b/backend/python/vllm/backend_vllm.py
@@ -95,6 +95,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
engine_args.trust_remote_code = request.TrustRemoteCode
if request.EnforceEager:
engine_args.enforce_eager = request.EnforceEager
+ if request.TensorParallelSize:
+ engine_args.tensor_parallel_size = request.TensorParallelSize
if request.SwapSpace != 0:
engine_args.swap_space = request.SwapSpace
if request.MaxModelLen != 0:
diff --git a/core/backend/options.go b/core/backend/options.go
index 5b303b05..60cb01ff 100644
--- a/core/backend/options.go
+++ b/core/backend/options.go
@@ -74,6 +74,7 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
EnforceEager: c.EnforceEager,
SwapSpace: int32(c.SwapSpace),
MaxModelLen: int32(c.MaxModelLen),
+ TensorParallelSize: int32(c.TensorParallelSize),
MMProj: c.MMProj,
YarnExtFactor: c.YarnExtFactor,
YarnAttnFactor: c.YarnAttnFactor,
diff --git a/core/config/backend_config.go b/core/config/backend_config.go
index 1161cf9f..a439ee63 100644
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -140,6 +140,7 @@ type LLMConfig struct {
EnforceEager bool `yaml:"enforce_eager"` // vLLM
SwapSpace int `yaml:"swap_space"` // vLLM
MaxModelLen int `yaml:"max_model_len"` // vLLM
+ TensorParallelSize int `yaml:"tensor_parallel_size"` // vLLM
MMProj string `yaml:"mmproj"`
RopeScaling string `yaml:"rope_scaling"`
From afa1bca1e367c2a52fd584d95d5a98904cadb353 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Sat, 20 Apr 2024 20:20:10 +0200
Subject: [PATCH 0191/2750] fix(llama.cpp): set -1 as default for max tokens
(#2087)
Signed-off-by: Ettore Di Giacinto
---
core/config/backend_config.go | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/core/config/backend_config.go b/core/config/backend_config.go
index a439ee63..6ca24afa 100644
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -205,15 +205,15 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
defaultTopP := 0.95
defaultTopK := 40
defaultTemp := 0.9
- defaultMaxTokens := 2048
defaultMirostat := 2
defaultMirostatTAU := 5.0
defaultMirostatETA := 0.1
defaultTypicalP := 1.0
defaultTFZ := 1.0
+ defaultInfinity := -1
// Try to offload all GPU layers (if GPU is found)
- defaultNGPULayers := 99999999
+ defaultHigh := 99999999
trueV := true
falseV := false
@@ -254,7 +254,7 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
}
if cfg.Maxtokens == nil {
- cfg.Maxtokens = &defaultMaxTokens
+ cfg.Maxtokens = &defaultInfinity
}
if cfg.Mirostat == nil {
@@ -269,7 +269,7 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
cfg.MirostatTAU = &defaultMirostatTAU
}
if cfg.NGPULayers == nil {
- cfg.NGPULayers = &defaultNGPULayers
+ cfg.NGPULayers = &defaultHigh
}
if cfg.LowVRAM == nil {
From 284ad026b1ce3d2751a51e48e5eea8ea6458e191 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Sun, 21 Apr 2024 01:19:57 +0200
Subject: [PATCH 0192/2750] refactor(routes): split routes registration (#2077)
Signed-off-by: Ettore Di Giacinto
---
Makefile | 2 +-
core/http/api.go | 135 ++-----------------------
core/http/endpoints/localai/welcome.go | 28 +++++
core/http/render.go | 37 -------
core/http/routes/elevenlabs.go | 19 ++++
core/http/routes/localai.go | 64 ++++++++++++
core/http/routes/openai.go | 86 ++++++++++++++++
core/http/routes/welcome.go | 23 +++++
8 files changed, 227 insertions(+), 167 deletions(-)
create mode 100644 core/http/endpoints/localai/welcome.go
create mode 100644 core/http/routes/elevenlabs.go
create mode 100644 core/http/routes/localai.go
create mode 100644 core/http/routes/openai.go
create mode 100644 core/http/routes/welcome.go
diff --git a/Makefile b/Makefile
index b9af4612..ea81b535 100644
--- a/Makefile
+++ b/Makefile
@@ -714,4 +714,4 @@ docker-image-intel-xpu:
.PHONY: swagger
swagger:
- swag init -g core/http/api.go --output swagger
+ swag init -g core/http/app.go --output swagger
diff --git a/core/http/api.go b/core/http/api.go
index fe8f711c..1061627f 100644
--- a/core/http/api.go
+++ b/core/http/api.go
@@ -8,22 +8,21 @@ import (
"github.com/go-skynet/LocalAI/pkg/utils"
- "github.com/go-skynet/LocalAI/core/http/endpoints/elevenlabs"
"github.com/go-skynet/LocalAI/core/http/endpoints/localai"
"github.com/go-skynet/LocalAI/core/http/endpoints/openai"
+ "github.com/go-skynet/LocalAI/core/http/routes"
"github.com/go-skynet/LocalAI/core/config"
"github.com/go-skynet/LocalAI/core/schema"
"github.com/go-skynet/LocalAI/core/services"
- "github.com/go-skynet/LocalAI/internal"
"github.com/go-skynet/LocalAI/pkg/model"
"github.com/gofiber/contrib/fiberzerolog"
"github.com/gofiber/fiber/v2"
"github.com/gofiber/fiber/v2/middleware/cors"
"github.com/gofiber/fiber/v2/middleware/recover"
- "github.com/gofiber/swagger" // swagger handler
+ // swagger handler
"github.com/rs/zerolog/log"
)
@@ -175,16 +174,6 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
app.Use(c)
}
- // LocalAI API endpoints
- galleryService := services.NewGalleryService(appConfig.ModelPath)
- galleryService.Start(appConfig.Context, cl)
-
- app.Get("/version", auth, func(c *fiber.Ctx) error {
- return c.JSON(struct {
- Version string `json:"version"`
- }{Version: internal.PrintableVersion()})
- })
-
// Make sure directories exists
os.MkdirAll(appConfig.ImageDir, 0755)
os.MkdirAll(appConfig.AudioDir, 0755)
@@ -197,122 +186,10 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants)
utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles)
- app.Get("/swagger/*", swagger.HandlerDefault) // default
-
- welcomeRoute(
- app,
- cl,
- ml,
- appConfig,
- auth,
- )
-
- modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService)
- app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint())
- app.Get("/models/available", auth, modelGalleryEndpointService.ListModelFromGalleryEndpoint())
- app.Get("/models/galleries", auth, modelGalleryEndpointService.ListModelGalleriesEndpoint())
- app.Post("/models/galleries", auth, modelGalleryEndpointService.AddModelGalleryEndpoint())
- app.Delete("/models/galleries", auth, modelGalleryEndpointService.RemoveModelGalleryEndpoint())
- app.Get("/models/jobs/:uuid", auth, modelGalleryEndpointService.GetOpStatusEndpoint())
- app.Get("/models/jobs", auth, modelGalleryEndpointService.GetAllStatusEndpoint())
-
- app.Post("/tts", auth, localai.TTSEndpoint(cl, ml, appConfig))
-
- // Elevenlabs
- app.Post("/v1/text-to-speech/:voice-id", auth, elevenlabs.TTSEndpoint(cl, ml, appConfig))
-
- // Stores
- sl := model.NewModelLoader("")
- app.Post("/stores/set", auth, localai.StoresSetEndpoint(sl, appConfig))
- app.Post("/stores/delete", auth, localai.StoresDeleteEndpoint(sl, appConfig))
- app.Post("/stores/get", auth, localai.StoresGetEndpoint(sl, appConfig))
- app.Post("/stores/find", auth, localai.StoresFindEndpoint(sl, appConfig))
-
- // openAI compatible API endpoint
-
- // chat
- app.Post("/v1/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig))
- app.Post("/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig))
-
- // edit
- app.Post("/v1/edits", auth, openai.EditEndpoint(cl, ml, appConfig))
- app.Post("/edits", auth, openai.EditEndpoint(cl, ml, appConfig))
-
- // assistant
- app.Get("/v1/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig))
- app.Get("/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig))
- app.Post("/v1/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig))
- app.Post("/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig))
- app.Delete("/v1/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig))
- app.Delete("/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig))
- app.Get("/v1/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig))
- app.Get("/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig))
- app.Post("/v1/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig))
- app.Post("/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig))
- app.Get("/v1/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
- app.Get("/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
- app.Post("/v1/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
- app.Post("/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
- app.Delete("/v1/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
- app.Delete("/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
- app.Get("/v1/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig))
- app.Get("/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig))
-
- // files
- app.Post("/v1/files", auth, openai.UploadFilesEndpoint(cl, appConfig))
- app.Post("/files", auth, openai.UploadFilesEndpoint(cl, appConfig))
- app.Get("/v1/files", auth, openai.ListFilesEndpoint(cl, appConfig))
- app.Get("/files", auth, openai.ListFilesEndpoint(cl, appConfig))
- app.Get("/v1/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig))
- app.Get("/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig))
- app.Delete("/v1/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig))
- app.Delete("/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig))
- app.Get("/v1/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig))
- app.Get("/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig))
-
- // completion
- app.Post("/v1/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
- app.Post("/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
- app.Post("/v1/engines/:model/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
-
- // embeddings
- app.Post("/v1/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
- app.Post("/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
- app.Post("/v1/engines/:model/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
-
- // audio
- app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(cl, ml, appConfig))
- app.Post("/v1/audio/speech", auth, localai.TTSEndpoint(cl, ml, appConfig))
-
- // images
- app.Post("/v1/images/generations", auth, openai.ImageEndpoint(cl, ml, appConfig))
-
- if appConfig.ImageDir != "" {
- app.Static("/generated-images", appConfig.ImageDir)
- }
-
- if appConfig.AudioDir != "" {
- app.Static("/generated-audio", appConfig.AudioDir)
- }
-
- ok := func(c *fiber.Ctx) error {
- return c.SendStatus(200)
- }
-
- // Kubernetes health checks
- app.Get("/healthz", ok)
- app.Get("/readyz", ok)
-
- // Experimental Backend Statistics Module
- backendMonitor := services.NewBackendMonitor(cl, ml, appConfig) // Split out for now
- app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(backendMonitor))
- app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(backendMonitor))
-
- // models
- app.Get("/v1/models", auth, openai.ListModelsEndpoint(cl, ml))
- app.Get("/models", auth, openai.ListModelsEndpoint(cl, ml))
-
- app.Get("/metrics", auth, localai.LocalAIMetricsEndpoint())
+ routes.RegisterElevenLabsRoutes(app, cl, ml, appConfig, auth)
+ routes.RegisterLocalAIRoutes(app, cl, ml, appConfig, auth)
+ routes.RegisterOpenAIRoutes(app, cl, ml, appConfig, auth)
+ routes.RegisterPagesRoutes(app, cl, ml, appConfig, auth)
// Define a custom 404 handler
// Note: keep this at the bottom!
diff --git a/core/http/endpoints/localai/welcome.go b/core/http/endpoints/localai/welcome.go
new file mode 100644
index 00000000..fd3e6230
--- /dev/null
+++ b/core/http/endpoints/localai/welcome.go
@@ -0,0 +1,28 @@
+package localai
+
+import (
+ "github.com/go-skynet/LocalAI/core/config"
+ "github.com/go-skynet/LocalAI/internal"
+ "github.com/gofiber/fiber/v2"
+)
+
+func WelcomeEndpoint(appConfig *config.ApplicationConfig,
+ models []string, backendConfigs []config.BackendConfig) func(*fiber.Ctx) error {
+ return func(c *fiber.Ctx) error {
+ summary := fiber.Map{
+ "Title": "LocalAI API - " + internal.PrintableVersion(),
+ "Version": internal.PrintableVersion(),
+ "Models": models,
+ "ModelsConfig": backendConfigs,
+ "ApplicationConfig": appConfig,
+ }
+
+ if string(c.Context().Request.Header.ContentType()) == "application/json" || len(c.Accepts("html")) == 0 {
+ // The client expects a JSON response
+ return c.Status(fiber.StatusOK).JSON(summary)
+ } else {
+ // Render index
+ return c.Render("views/index", summary)
+ }
+ }
+}
diff --git a/core/http/render.go b/core/http/render.go
index c5045868..8f1b36c6 100644
--- a/core/http/render.go
+++ b/core/http/render.go
@@ -7,10 +7,7 @@ import (
"net/http"
"github.com/Masterminds/sprig/v3"
- "github.com/go-skynet/LocalAI/core/config"
"github.com/go-skynet/LocalAI/core/schema"
- "github.com/go-skynet/LocalAI/internal"
- "github.com/go-skynet/LocalAI/pkg/model"
"github.com/gofiber/fiber/v2"
fiberhtml "github.com/gofiber/template/html/v2"
"github.com/russross/blackfriday"
@@ -33,40 +30,6 @@ func notFoundHandler(c *fiber.Ctx) error {
return nil
}
-func welcomeRoute(
- app *fiber.App,
- cl *config.BackendConfigLoader,
- ml *model.ModelLoader,
- appConfig *config.ApplicationConfig,
- auth func(*fiber.Ctx) error,
-) {
- if appConfig.DisableWelcomePage {
- return
- }
-
- models, _ := ml.ListModels()
- backendConfigs := cl.GetAllBackendConfigs()
-
- app.Get("/", auth, func(c *fiber.Ctx) error {
- summary := fiber.Map{
- "Title": "LocalAI API - " + internal.PrintableVersion(),
- "Version": internal.PrintableVersion(),
- "Models": models,
- "ModelsConfig": backendConfigs,
- "ApplicationConfig": appConfig,
- }
-
- if string(c.Context().Request.Header.ContentType()) == "application/json" || len(c.Accepts("html")) == 0 {
- // The client expects a JSON response
- return c.Status(fiber.StatusOK).JSON(summary)
- } else {
- // Render index
- return c.Render("views/index", summary)
- }
- })
-
-}
-
func renderEngine() *fiberhtml.Engine {
engine := fiberhtml.NewFileSystem(http.FS(viewsfs), ".html")
engine.AddFuncMap(sprig.FuncMap())
diff --git a/core/http/routes/elevenlabs.go b/core/http/routes/elevenlabs.go
new file mode 100644
index 00000000..e24a19a8
--- /dev/null
+++ b/core/http/routes/elevenlabs.go
@@ -0,0 +1,19 @@
+package routes
+
+import (
+ "github.com/go-skynet/LocalAI/core/config"
+ "github.com/go-skynet/LocalAI/core/http/endpoints/elevenlabs"
+ "github.com/go-skynet/LocalAI/pkg/model"
+ "github.com/gofiber/fiber/v2"
+)
+
+func RegisterElevenLabsRoutes(app *fiber.App,
+ cl *config.BackendConfigLoader,
+ ml *model.ModelLoader,
+ appConfig *config.ApplicationConfig,
+ auth func(*fiber.Ctx) error) {
+
+ // Elevenlabs
+ app.Post("/v1/text-to-speech/:voice-id", auth, elevenlabs.TTSEndpoint(cl, ml, appConfig))
+
+}
diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go
new file mode 100644
index 00000000..2651a53e
--- /dev/null
+++ b/core/http/routes/localai.go
@@ -0,0 +1,64 @@
+package routes
+
+import (
+ "github.com/go-skynet/LocalAI/core/config"
+ "github.com/go-skynet/LocalAI/core/http/endpoints/localai"
+ "github.com/go-skynet/LocalAI/core/services"
+ "github.com/go-skynet/LocalAI/internal"
+ "github.com/go-skynet/LocalAI/pkg/model"
+ "github.com/gofiber/fiber/v2"
+ "github.com/gofiber/swagger"
+)
+
+func RegisterLocalAIRoutes(app *fiber.App,
+ cl *config.BackendConfigLoader,
+ ml *model.ModelLoader,
+ appConfig *config.ApplicationConfig,
+ auth func(*fiber.Ctx) error) {
+
+ app.Get("/swagger/*", swagger.HandlerDefault) // default
+
+ // LocalAI API endpoints
+ galleryService := services.NewGalleryService(appConfig.ModelPath)
+ galleryService.Start(appConfig.Context, cl)
+
+ modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService)
+ app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint())
+ app.Get("/models/available", auth, modelGalleryEndpointService.ListModelFromGalleryEndpoint())
+ app.Get("/models/galleries", auth, modelGalleryEndpointService.ListModelGalleriesEndpoint())
+ app.Post("/models/galleries", auth, modelGalleryEndpointService.AddModelGalleryEndpoint())
+ app.Delete("/models/galleries", auth, modelGalleryEndpointService.RemoveModelGalleryEndpoint())
+ app.Get("/models/jobs/:uuid", auth, modelGalleryEndpointService.GetOpStatusEndpoint())
+ app.Get("/models/jobs", auth, modelGalleryEndpointService.GetAllStatusEndpoint())
+
+ app.Post("/tts", auth, localai.TTSEndpoint(cl, ml, appConfig))
+
+ // Stores
+ sl := model.NewModelLoader("")
+ app.Post("/stores/set", auth, localai.StoresSetEndpoint(sl, appConfig))
+ app.Post("/stores/delete", auth, localai.StoresDeleteEndpoint(sl, appConfig))
+ app.Post("/stores/get", auth, localai.StoresGetEndpoint(sl, appConfig))
+ app.Post("/stores/find", auth, localai.StoresFindEndpoint(sl, appConfig))
+
+ // Kubernetes health checks
+ ok := func(c *fiber.Ctx) error {
+ return c.SendStatus(200)
+ }
+
+ app.Get("/healthz", ok)
+ app.Get("/readyz", ok)
+
+ app.Get("/metrics", auth, localai.LocalAIMetricsEndpoint())
+
+ // Experimental Backend Statistics Module
+ backendMonitor := services.NewBackendMonitor(cl, ml, appConfig) // Split out for now
+ app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(backendMonitor))
+ app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(backendMonitor))
+
+ app.Get("/version", auth, func(c *fiber.Ctx) error {
+ return c.JSON(struct {
+ Version string `json:"version"`
+ }{Version: internal.PrintableVersion()})
+ })
+
+}
diff --git a/core/http/routes/openai.go b/core/http/routes/openai.go
new file mode 100644
index 00000000..c51ccdcb
--- /dev/null
+++ b/core/http/routes/openai.go
@@ -0,0 +1,86 @@
+package routes
+
+import (
+ "github.com/go-skynet/LocalAI/core/config"
+ "github.com/go-skynet/LocalAI/core/http/endpoints/localai"
+ "github.com/go-skynet/LocalAI/core/http/endpoints/openai"
+ "github.com/go-skynet/LocalAI/pkg/model"
+ "github.com/gofiber/fiber/v2"
+)
+
+func RegisterOpenAIRoutes(app *fiber.App,
+ cl *config.BackendConfigLoader,
+ ml *model.ModelLoader,
+ appConfig *config.ApplicationConfig,
+ auth func(*fiber.Ctx) error) {
+ // openAI compatible API endpoint
+
+ // chat
+ app.Post("/v1/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig))
+ app.Post("/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig))
+
+ // edit
+ app.Post("/v1/edits", auth, openai.EditEndpoint(cl, ml, appConfig))
+ app.Post("/edits", auth, openai.EditEndpoint(cl, ml, appConfig))
+
+ // assistant
+ app.Get("/v1/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig))
+ app.Get("/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig))
+ app.Post("/v1/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig))
+ app.Post("/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig))
+ app.Delete("/v1/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig))
+ app.Delete("/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig))
+ app.Get("/v1/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig))
+ app.Get("/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig))
+ app.Post("/v1/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig))
+ app.Post("/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig))
+ app.Get("/v1/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
+ app.Get("/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
+ app.Post("/v1/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
+ app.Post("/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
+ app.Delete("/v1/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
+ app.Delete("/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
+ app.Get("/v1/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig))
+ app.Get("/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig))
+
+ // files
+ app.Post("/v1/files", auth, openai.UploadFilesEndpoint(cl, appConfig))
+ app.Post("/files", auth, openai.UploadFilesEndpoint(cl, appConfig))
+ app.Get("/v1/files", auth, openai.ListFilesEndpoint(cl, appConfig))
+ app.Get("/files", auth, openai.ListFilesEndpoint(cl, appConfig))
+ app.Get("/v1/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig))
+ app.Get("/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig))
+ app.Delete("/v1/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig))
+ app.Delete("/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig))
+ app.Get("/v1/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig))
+ app.Get("/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig))
+
+ // completion
+ app.Post("/v1/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
+ app.Post("/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
+ app.Post("/v1/engines/:model/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
+
+ // embeddings
+ app.Post("/v1/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
+ app.Post("/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
+ app.Post("/v1/engines/:model/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
+
+ // audio
+ app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(cl, ml, appConfig))
+ app.Post("/v1/audio/speech", auth, localai.TTSEndpoint(cl, ml, appConfig))
+
+ // images
+ app.Post("/v1/images/generations", auth, openai.ImageEndpoint(cl, ml, appConfig))
+
+ if appConfig.ImageDir != "" {
+ app.Static("/generated-images", appConfig.ImageDir)
+ }
+
+ if appConfig.AudioDir != "" {
+ app.Static("/generated-audio", appConfig.AudioDir)
+ }
+
+ // models
+ app.Get("/v1/models", auth, openai.ListModelsEndpoint(cl, ml))
+ app.Get("/models", auth, openai.ListModelsEndpoint(cl, ml))
+}
diff --git a/core/http/routes/welcome.go b/core/http/routes/welcome.go
new file mode 100644
index 00000000..29b9e586
--- /dev/null
+++ b/core/http/routes/welcome.go
@@ -0,0 +1,23 @@
+package routes
+
+import (
+ "github.com/go-skynet/LocalAI/core/config"
+ "github.com/go-skynet/LocalAI/core/http/endpoints/localai"
+ "github.com/go-skynet/LocalAI/pkg/model"
+ "github.com/gofiber/fiber/v2"
+)
+
+func RegisterPagesRoutes(app *fiber.App,
+ cl *config.BackendConfigLoader,
+ ml *model.ModelLoader,
+ appConfig *config.ApplicationConfig,
+ auth func(*fiber.Ctx) error) {
+
+ models, _ := ml.ListModels()
+ backendConfigs := cl.GetAllBackendConfigs()
+
+ if !appConfig.DisableWelcomePage {
+ app.Get("/", auth, localai.WelcomeEndpoint(appConfig, models, backendConfigs))
+ }
+
+}
From 180cd4ccda0753ef1afb2eb07857ec0534ea3366 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Sun, 21 Apr 2024 16:34:00 +0200
Subject: [PATCH 0193/2750] fix(llama.cpp-ggml): fixup `max_tokens` for old
backend (#2094)
fix(llama.cpp-ggml): set 0 as default for `max_tokens`
Signed-off-by: Ettore Di Giacinto
---
core/config/backend_config.go | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/core/config/backend_config.go b/core/config/backend_config.go
index 6ca24afa..dfc216dc 100644
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -210,7 +210,7 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
defaultMirostatETA := 0.1
defaultTypicalP := 1.0
defaultTFZ := 1.0
- defaultInfinity := -1
+ defaultZero := 0
// Try to offload all GPU layers (if GPU is found)
defaultHigh := 99999999
@@ -254,7 +254,7 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
}
if cfg.Maxtokens == nil {
- cfg.Maxtokens = &defaultInfinity
+ cfg.Maxtokens = &defaultZero
}
if cfg.Mirostat == nil {
From 39814cab32a19fa4a6b88935d4587c6c6bbebe16 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Sun, 21 Apr 2024 16:46:13 +0200
Subject: [PATCH 0194/2750] Update README.md
Signed-off-by: Ettore Di Giacinto
---
README.md | 1 +
1 file changed, 1 insertion(+)
diff --git a/README.md b/README.md
index 4c2f68b2..e28e3cb0 100644
--- a/README.md
+++ b/README.md
@@ -50,6 +50,7 @@
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
+- llama3: https://github.com/mudler/LocalAI/discussions/2076
- Parler-TTS: https://github.com/mudler/LocalAI/pull/2027
- Landing page: https://github.com/mudler/LocalAI/pull/1922
- Openvino support: https://github.com/mudler/LocalAI/pull/1892
From 66b002458db4ec93133d066326a63585ba236412 Mon Sep 17 00:00:00 2001
From: fakezeta
Date: Sun, 21 Apr 2024 18:20:25 +0200
Subject: [PATCH 0195/2750] Transformer Backend: Implementing
use_tokenizer_template and stop_prompts options (#2090)
* fix regression #1971
fixes regression #1971 introduced by intel_extension_for_transformers==1.4
* UseTokenizerTemplate and StopPrompt
Implementation of use_tokenizer_template and stopwords options
---
.../transformers/transformers_server.py | 22 +++++++++++++++----
1 file changed, 18 insertions(+), 4 deletions(-)
diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py
index c7f1cd75..1b38a956 100755
--- a/backend/python/transformers/transformers_server.py
+++ b/backend/python/transformers/transformers_server.py
@@ -148,7 +148,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
else:
device_map="CPU"
self.model = OVModelForCausalLM.from_pretrained(model_name,
- compile=True,
+ compile=True,
+ ov_config={"PERFORMANCE_HINT": "LATENCY"},
device=device_map)
self.OV = True
else:
@@ -212,12 +213,25 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
set_seed(request.Seed)
if request.TopP == 0:
request.TopP = 0.9
+
+ if request.TopK == 0:
+ request.TopK = 40
max_tokens = 200
if request.Tokens > 0:
max_tokens = request.Tokens
- inputs = self.tokenizer(request.Prompt, return_tensors="pt")
+ prompt = request.Prompt
+ if not request.Prompt and request.UseTokenizerTemplate and request.Messages:
+ prompt = self.tokenizer.apply_chat_template(request.Messages, tokenize=False, add_generation_prompt=True)
+
+ eos_token_id = self.tokenizer.eos_token_id
+ if request.StopPrompts:
+ eos_token_id = []
+ for word in request.StopPrompts:
+ eos_token_id.append(self.tokenizer.convert_tokens_to_ids(word))
+
+ inputs = self.tokenizer(prompt, return_tensors="pt")
if self.CUDA:
inputs = inputs.to("cuda")
if XPU and self.OV == False:
@@ -235,7 +249,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
top_k=request.TopK,
do_sample=True,
attention_mask=inputs["attention_mask"],
- eos_token_id=self.tokenizer.eos_token_id,
+ eos_token_id=eos_token_id,
pad_token_id=self.tokenizer.eos_token_id,
streamer=streamer)
thread=Thread(target=self.model.generate, kwargs=config)
@@ -264,7 +278,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
top_k=request.TopK,
do_sample=True,
attention_mask=inputs["attention_mask"],
- eos_token_id=self.tokenizer.eos_token_id,
+ eos_token_id=eos_token_id,
pad_token_id=self.tokenizer.eos_token_id)
generated_text = self.tokenizer.batch_decode(outputs[:, inputs["input_ids"].shape[1]:], skip_special_tokens=True)[0]
From 38c9abed8bef6cb9c9b7c29ee1b92f86e5317ec7 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sun, 21 Apr 2024 18:35:30 +0200
Subject: [PATCH 0196/2750] :arrow_up: Update ggerganov/llama.cpp (#2089)
Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index ea81b535..761c76d6 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=0e4802b2ecbaab04b4f829fde4a3096ca19c84b5
+CPPLLAMA_VERSION?=b8109bc0139f15a5b321909f47510b89dca47ffc
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
From 228bc4903f7eed3d384e1094255a8159153158a2 Mon Sep 17 00:00:00 2001
From: Dave
Date: Sun, 21 Apr 2024 16:39:17 -0400
Subject: [PATCH 0197/2750] fix: action-tmate detached (#2092)
connect-timeout-seconds works best with `detached: true`
Signed-off-by: Dave
---
.github/workflows/test.yml | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 9eb4f084..f50479e1 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -123,6 +123,7 @@ jobs:
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.18
with:
+ detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true
@@ -177,6 +178,7 @@ jobs:
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.18
with:
+ detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true
@@ -213,5 +215,6 @@ jobs:
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.18
with:
+ detached: true
connect-timeout-seconds: 180
- limit-access-to-actor: true
\ No newline at end of file
+ limit-access-to-actor: true
From f3f6535aad2c899afbc71b273ebd9282438b7814 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Sun, 21 Apr 2024 22:39:28 +0200
Subject: [PATCH 0198/2750] fix: rename fiber entrypoint from http/api to
http/app (#2096)
Signed-off-by: Ettore Di Giacinto
Co-authored-by: Dave
---
core/http/{api.go => app.go} | 0
core/http/{api_test.go => app_test.go} | 0
core/http/{apt_suite_test.go => http_suite_test.go} | 0
3 files changed, 0 insertions(+), 0 deletions(-)
rename core/http/{api.go => app.go} (100%)
rename core/http/{api_test.go => app_test.go} (100%)
rename core/http/{apt_suite_test.go => http_suite_test.go} (100%)
diff --git a/core/http/api.go b/core/http/app.go
similarity index 100%
rename from core/http/api.go
rename to core/http/app.go
diff --git a/core/http/api_test.go b/core/http/app_test.go
similarity index 100%
rename from core/http/api_test.go
rename to core/http/app_test.go
diff --git a/core/http/apt_suite_test.go b/core/http/http_suite_test.go
similarity index 100%
rename from core/http/apt_suite_test.go
rename to core/http/http_suite_test.go
From 220958a87c17cf6f1c82dcb4f3f3f8756ea3881d Mon Sep 17 00:00:00 2001
From: Ikko Eltociear Ashimine
Date: Mon, 22 Apr 2024 13:34:59 +0900
Subject: [PATCH 0199/2750] fix: typo in models.go (#2099)
---
core/cli/models.go | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/core/cli/models.go b/core/cli/models.go
index 62ef366b..6615e21d 100644
--- a/core/cli/models.go
+++ b/core/cli/models.go
@@ -25,7 +25,7 @@ type ModelsInstall struct {
}
type ModelsCMD struct {
- List ModelsList `cmd:"" help:"List the models avaiable in your galleries" default:"withargs"`
+ List ModelsList `cmd:"" help:"List the models available in your galleries" default:"withargs"`
Install ModelsInstall `cmd:"" help:"Install a model from the gallery"`
}
From 729378ca98b66ef84921c8f0eb40208e0c2721a5 Mon Sep 17 00:00:00 2001
From: jtwolfe
Date: Mon, 22 Apr 2024 23:47:51 +1000
Subject: [PATCH 0200/2750] AMD/ROCm Documentation update + formatting fix
(#2100)
* Update aio-images.md
Signed-off-by: jtwolfe
* Update aio-images.md
Signed-off-by: jtwolfe
* Update aio-images.md
Signed-off-by: jtwolfe
* Update GPU-acceleration.md
Signed-off-by: jtwolfe
* Update GPU-acceleration.md
Signed-off-by: jtwolfe
* Update GPU-acceleration.md
Signed-off-by: jtwolfe
* Update GPU-acceleration.md
Signed-off-by: jtwolfe
* Update GPU-acceleration.md
Signed-off-by: jtwolfe
* Update GPU-acceleration.md
Signed-off-by: jtwolfe
---------
Signed-off-by: jtwolfe
---
.../content/docs/features/GPU-acceleration.md | 139 +++++++++++++++++-
docs/content/docs/reference/aio-images.md | 15 +-
2 files changed, 146 insertions(+), 8 deletions(-)
diff --git a/docs/content/docs/features/GPU-acceleration.md b/docs/content/docs/features/GPU-acceleration.md
index aa931f07..b382309e 100644
--- a/docs/content/docs/features/GPU-acceleration.md
+++ b/docs/content/docs/features/GPU-acceleration.md
@@ -12,7 +12,7 @@ Section under construction
This section contains instruction on how to use LocalAI with GPU acceleration.
{{% alert icon="⚡" context="warning" %}}
-For accelleration for AMD or Metal HW there are no specific container images, see the [build]({{%relref "docs/getting-started/build#Acceleration" %}})
+For accelleration for AMD or Metal HW is still in development, for additional details see the [build]({{%relref "docs/getting-started/build#Acceleration" %}})
{{% /alert %}}
@@ -110,6 +110,143 @@ llama_model_load_internal: total VRAM used: 1598 MB
llama_init_from_file: kv self size = 512.00 MB
```
+## ROCM(AMD) acceleration
+
+There are a limited number of tested configurations for ROCm systems however most newer deditated GPU consumer grade devices seem to be supported under the current ROCm6 implementation.
+
+Due to the nature of ROCm it is best to run all implementations in containers as this limits the number of packages required for installation on host system, compatability and package versions for dependencies across all variations of OS must be tested independently if disired, please refer to the [build]({{%relref "docs/getting-started/build#Acceleration" %}}) documentation.
+
+### Requirements
+
+- `ROCm 6.x.x` compatible GPU/accelerator
+- OS: `Ubuntu` (22.04, 20.04), `RHEL` (9.3, 9.2, 8.9, 8.8), `SLES` (15.5, 15.4)
+- Installed to host: `amdgpu-dkms` and `rocm` >=6.0.0 as per ROCm documentation.
+
+### Recommendations
+
+- Do not use on a system running Wayland.
+- If running with Xorg do not use GPU assigned for compute for desktop rendering.
+- Ensure at least 100GB of free space on disk hosting container runtime and storing images prior to installation.
+
+### Limitations
+
+Ongoing verification testing of ROCm compatability with integrated backends.
+Please note the following list of verified backends and devices.
+
+### Verified
+
+The devices in the following list have been tested with `hipblas` images running `ROCm 6.0.0`
+
+| Backend | Verified | Devices |
+| ---- | ---- | ---- |
+| llama.cpp | yes | Radeon VII (gfx906) |
+| diffusers | yes | Radeon VII (gfx906) |
+| piper | yes | Radeon VII (gfx906) |
+| whisper | no | none |
+| autogptq | no | none |
+| bark | no | none |
+| coqui | no | none |
+| transformers | no | none |
+| exllama | no | none |
+| exllama2 | no | none |
+| mamba | no | none |
+| petals | no | none |
+| sentencetransformers | no | none |
+| transformers-musicgen | no | none |
+| vall-e-x | no | none |
+| vllm | no | none |
+
+**You can help by expanding this list.**
+
+### System Prep
+
+1. Check your GPU LLVM target is compatible with the version of ROCm. This can be found in the [LLVM Docs](https://llvm.org/docs/AMDGPUUsage.html).
+2. Check which ROCm version is compatible with your LLVM target and your chosen OS (pay special attention to supported kernel versions). See the following for compatability for ([ROCm 6.0.0](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.0.0/reference/system-requirements.html)) or ([ROCm 6.0.2](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html))
+3. Install you chosen version of the `dkms` and `rocm` (it is recommended that the native package manager be used for this process for any OS as version changes are executed more easily via this method if updates are required). Take care to restart after installing `amdgpu-dkms` and before installing `rocm`, for details regarding this see the installation documentation for your chosen OS ([6.0.2](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/native-install/index.html) or [6.0.0](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.0.0/how-to/native-install/index.html))
+4. Deploy. Yes it's that easy.
+
+#### Setup Example (Docker/containerd)
+
+The following are examples of the ROCm specific configuration elements required.
+
+```yaml
+# docker-compose.yaml
+ # For full functionality select a non-'core' image, version locking the image is recommended for debug purposes.
+ image: quay.io/go-skynet/local-ai:master-aio-gpu-hipblas
+ environment:
+ - DEBUG=true
+ # If your gpu is not already included in the current list of default targets the following build details are required.
+ - REBUILD=true
+ - BUILD_TYPE=hipblas
+ - GPU_TARGETS=gfx906 # Example for Radeon VII
+ devices:
+ # AMD GPU only require the following devices be passed through to the container for offloading to occur.
+ - /dev/dri
+ - /dev/kfd
+```
+
+The same can also be executed as a `run` for your container runtime
+
+```
+docker run \
+ -e DEBUG=true \
+ -e REBUILD=true \
+ -e BUILD_TYPE=hipblas \
+ -e GPU_TARGETS=gfx906 \
+ --device /dev/dri \
+ --device /dev/kfd \
+ quay.io/go-skynet/local-ai:master-aio-gpu-hipblas
+```
+
+Please ensure to add all other required environment variables, port forwardings, etc to your `compose` file or `run` command.
+
+The rebuild process will take some time to complete when deploying these containers and it is recommended that you `pull` the image prior to deployment as depending on the version these images may be ~20GB in size.
+
+#### Example (k8s) (Advanced Deployment/WIP)
+
+For k8s deployments there is an additional step required before deployment, this is the deployment of the [ROCm/k8s-device-plugin](https://artifacthub.io/packages/helm/amd-gpu-helm/amd-gpu).
+For any k8s environment the documentation provided by AMD from the ROCm project should be successful. It is recommended that if you use rke2 or OpenShift that you deploy the SUSE or RedHat provided version of this resource to ensure compatability.
+After this has been completed the [helm chart from go-skynet](https://github.com/go-skynet/helm-charts) can be configured and deployed mostly un-edited.
+
+The following are details of the changes that should be made to ensure proper function.
+While these details may be configurable in the `values.yaml` development of this Helm chart is ongoing and is subject to change.
+
+The following details indicate the final state of the localai deployment relevant to GPU function.
+
+```yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: {NAME}-local-ai
+...
+spec:
+ ...
+ template:
+ ...
+ spec:
+ containers:
+ - env:
+ - name: HIP_VISIBLE_DEVICES
+ value: '0'
+ # This variable indicates the devices availible to container (0:device1 1:device2 2:device3) etc.
+ # For multiple devices (say device 1 and 3) the value would be equivelant to HIP_VISIBLE_DEVICES="0,2"
+ # Please take note of this when an iGPU is present in host system as compatability is not assured.
+ ...
+ resources:
+ limits:
+ amd.com/gpu: '1'
+ requests:
+ amd.com/gpu: '1'
+```
+
+This configuration has been tested on a 'custom' cluster managed by SUSE Rancher that was deployed on top of Ubuntu 22.04.4, certification of other configuration is ongoing and compatability is not gauranteed.
+
+### Notes
+
+- When installing the ROCM kernel driver on your system ensure that you are installing an equal or newer version that that which is currently implemented in LocalAI (6.0.0 at time of writing).
+- AMD documentation indicates that this will ensure functionality however your milage may vary depending on the GPU and distro you are using.
+- If you encounter an `Error 413` on attempting to upload an audio file or image for whisper or llava/bakllava on a k8s deployment, note that the ingress for your deployment may require the annontation `nginx.ingress.kubernetes.io/proxy-body-size: "25m"` to allow larger uploads. This may be included in future versions of the helm chart.
+
## Intel acceleration (sycl)
### Requirements
diff --git a/docs/content/docs/reference/aio-images.md b/docs/content/docs/reference/aio-images.md
index 40f01f06..b5253ee4 100644
--- a/docs/content/docs/reference/aio-images.md
+++ b/docs/content/docs/reference/aio-images.md
@@ -9,13 +9,14 @@ All-In-One images are images that come pre-configured with a set of models and b
In the AIO images there are models configured with the names of OpenAI models, however, they are really backed by Open Source models. You can find the table below
-| Category | Model name | Real model |
-| Text Generation | `gpt-4` | `phi-2`(CPU) or `hermes-2-pro-mistral`(GPU) |
-| Multimodal | `gpt-4-vision-preview` | `bakllava`(CPU) or `llava-1.6-mistral`(GPU) |
-| Text generation | `stablediffusion` | `stablediffusion`(CPU) `dreamshaper-8` (GPU) |
-| Audio transcription | `whisper-1` | `whisper` with the `whisper-base` model |
-| Text to Audio | `tts-1` | the `en-us-amy-low.onnx` model with `rhasspy` |
-| Embeddings | `text-embedding-ada-002` | |
+| Category | Model name | Real model (CPU) | Real model (GPU) |
+| ---- | ---- | ---- | ---- |
+| Text Generation | `gpt-4` | `phi-2` | `hermes-2-pro-mistral` |
+| Multimodal Vision | `gpt-4-vision-preview` | `bakllava` | `llava-1.6-mistral` |
+| Image Generation | `stablediffusion` | `stablediffusion` | `dreamshaper-8` |
+| Speech to Text | `whisper-1` | `whisper` with `whisper-base` model | <= same |
+| Text to Speech | `tts-1` | `en-us-amy-low.onnx` from `rhasspy/piper` | <= same |
+| Embeddings | `text-embedding-ada-002` | `all-MiniLM-L6-v2` in Q4 | `all-MiniLM-L6-v2` |
## Usage
From b6f0e80d54f3a0ab50688e0c391258a206f677d5 Mon Sep 17 00:00:00 2001
From: Taikono-Himazin
Date: Mon, 22 Apr 2024 23:37:13 +0900
Subject: [PATCH 0201/2750] Update text-generation.md (#2095)
Signed-off-by: Taikono-Himazin
---
docs/content/docs/features/text-generation.md | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/docs/content/docs/features/text-generation.md b/docs/content/docs/features/text-generation.md
index c11894e7..3f3f0b56 100644
--- a/docs/content/docs/features/text-generation.md
+++ b/docs/content/docs/features/text-generation.md
@@ -257,6 +257,10 @@ parameters:
# swap_space: 2
# Uncomment to specify the maximum length of a sequence (including prompt and output)
# max_model_len: 32768
+# Uncomment and specify the number of Tensor divisions.
+# Allows you to partition and run large models. Performance gains are limited.
+# https://github.com/vllm-project/vllm/issues/1435
+# tensor_parallel_size: 2
```
The backend will automatically download the required files in order to run the model.
@@ -356,4 +360,4 @@ template:
completion: |
{{.Input}}
-```
\ No newline at end of file
+```
From bd507678be6a45e81e1fb9f96e7620c6c4eb162f Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Tue, 23 Apr 2024 00:04:57 +0200
Subject: [PATCH 0202/2750] :arrow_up: Update docs version mudler/LocalAI
(#2105)
Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
docs/data/version.json | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/data/version.json b/docs/data/version.json
index 6a618115..55eebaeb 100644
--- a/docs/data/version.json
+++ b/docs/data/version.json
@@ -1,3 +1,3 @@
{
- "version": "v2.12.4"
+ "version": "null"
}
From 0d8bf91699a9deee596011cb1c30be29ec680685 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Tue, 23 Apr 2024 09:22:58 +0200
Subject: [PATCH 0203/2750] feat: Galleries UI (#2104)
* WIP: add models to webui
Signed-off-by: Ettore Di Giacinto
* Register routes
Signed-off-by: Ettore Di Giacinto
* fix: don't cache models
Signed-off-by: Ettore Di Giacinto
* small fixups
Signed-off-by: Ettore Di Giacinto
* fix: fixup multiple installs (strings.Clone)
Signed-off-by: Ettore Di Giacinto
---------
Signed-off-by: Ettore Di Giacinto
---
README.md | 2 +-
core/config/backend_config.go | 6 +-
core/http/app.go | 6 +-
core/http/elements/gallery.go | 171 +++++++++++++++++++++++++
core/http/endpoints/localai/welcome.go | 6 +-
core/http/routes/localai.go | 3 +-
core/http/routes/ui.go | 107 ++++++++++++++++
core/http/routes/welcome.go | 6 +-
core/http/views/models.html | 40 ++++++
core/http/views/partials/head.html | 67 +++++++++-
core/http/views/partials/navbar.html | 1 +
docs/content/docs/overview.md | 2 +-
go.mod | 5 +-
go.sum | 2 +
main.go | 2 +-
pkg/downloader/progress.go | 13 ++
pkg/downloader/uri.go | 4 +-
pkg/gallery/models.go | 4 +-
pkg/gallery/op.go | 5 +-
pkg/startup/model_preload.go | 2 +-
20 files changed, 431 insertions(+), 23 deletions(-)
create mode 100644 core/http/elements/gallery.go
create mode 100644 core/http/routes/ui.go
create mode 100644 core/http/views/models.html
diff --git a/README.md b/README.md
index e28e3cb0..0b32febd 100644
--- a/README.md
+++ b/README.md
@@ -44,7 +44,7 @@
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
-**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that’s compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU.
+**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that’s compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU. It is created and maintained by [Ettore Di Giacinto](https://github.com/mudler).
## 🔥🔥 Hot topics / Roadmap
diff --git a/core/config/backend_config.go b/core/config/backend_config.go
index dfc216dc..64182e75 100644
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -512,7 +512,7 @@ func (cl *BackendConfigLoader) Preload(modelPath string) error {
for i, config := range cl.configs {
// Download files and verify their SHA
- for _, file := range config.DownloadFiles {
+ for i, file := range config.DownloadFiles {
log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename)
if err := utils.VerifyPath(file.Filename, modelPath); err != nil {
@@ -521,7 +521,7 @@ func (cl *BackendConfigLoader) Preload(modelPath string) error {
// Create file path
filePath := filepath.Join(modelPath, file.Filename)
- if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, status); err != nil {
+ if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, i, len(config.DownloadFiles), status); err != nil {
return err
}
}
@@ -535,7 +535,7 @@ func (cl *BackendConfigLoader) Preload(modelPath string) error {
// check if file exists
if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) {
- err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", status)
+ err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", 0, 0, status)
if err != nil {
return err
}
diff --git a/core/http/app.go b/core/http/app.go
index 1061627f..21652dd9 100644
--- a/core/http/app.go
+++ b/core/http/app.go
@@ -186,10 +186,14 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants)
utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles)
+ galleryService := services.NewGalleryService(appConfig.ModelPath)
+ galleryService.Start(appConfig.Context, cl)
+
routes.RegisterElevenLabsRoutes(app, cl, ml, appConfig, auth)
- routes.RegisterLocalAIRoutes(app, cl, ml, appConfig, auth)
+ routes.RegisterLocalAIRoutes(app, cl, ml, appConfig, galleryService, auth)
routes.RegisterOpenAIRoutes(app, cl, ml, appConfig, auth)
routes.RegisterPagesRoutes(app, cl, ml, appConfig, auth)
+ routes.RegisterUIRoutes(app, cl, ml, appConfig, galleryService, auth)
// Define a custom 404 handler
// Note: keep this at the bottom!
diff --git a/core/http/elements/gallery.go b/core/http/elements/gallery.go
new file mode 100644
index 00000000..370ca82d
--- /dev/null
+++ b/core/http/elements/gallery.go
@@ -0,0 +1,171 @@
+package elements
+
+import (
+ "fmt"
+
+ "github.com/chasefleming/elem-go"
+ "github.com/chasefleming/elem-go/attrs"
+ "github.com/go-skynet/LocalAI/pkg/gallery"
+)
+
+func DoneProgress(uid string) string {
+ return elem.Div(
+ attrs.Props{},
+ elem.H3(
+ attrs.Props{
+ "role": "status",
+ "id": "pblabel",
+ "tabindex": "-1",
+ "autofocus": "",
+ },
+ elem.Text("Installation completed"),
+ ),
+ ).Render()
+}
+
+func ErrorProgress(err string) string {
+ return elem.Div(
+ attrs.Props{},
+ elem.H3(
+ attrs.Props{
+ "role": "status",
+ "id": "pblabel",
+ "tabindex": "-1",
+ "autofocus": "",
+ },
+ elem.Text("Error"+err),
+ ),
+ ).Render()
+}
+
+func ProgressBar(progress string) string {
+ return elem.Div(attrs.Props{
+ "class": "progress",
+ "role": "progressbar",
+ "aria-valuemin": "0",
+ "aria-valuemax": "100",
+ "aria-valuenow": "0",
+ "aria-labelledby": "pblabel",
+ },
+ elem.Div(attrs.Props{
+ "id": "pb",
+ "class": "progress-bar",
+ "style": "width:" + progress + "%",
+ }),
+ ).Render()
+}
+
+func StartProgressBar(uid, progress string) string {
+ if progress == "" {
+ progress = "0"
+ }
+ return elem.Div(attrs.Props{
+ "hx-trigger": "done",
+ "hx-get": "/browse/job/" + uid,
+ "hx-swap": "outerHTML",
+ "hx-target": "this",
+ },
+ elem.H3(
+ attrs.Props{
+ "role": "status",
+ "id": "pblabel",
+ "tabindex": "-1",
+ "autofocus": "",
+ },
+ elem.Text("Installing"),
+ // This is a simple example of how to use the HTMLX library to create a progress bar that updates every 600ms.
+ elem.Div(attrs.Props{
+ "hx-get": "/browse/job/progress/" + uid,
+ "hx-trigger": "every 600ms",
+ "hx-target": "this",
+ "hx-swap": "innerHTML",
+ },
+ elem.Raw(ProgressBar(progress)),
+ ),
+ ),
+ ).Render()
+}
+
+func ListModels(models []*gallery.GalleryModel) string {
+ modelsElements := []elem.Node{}
+ span := func(s string) elem.Node {
+ return elem.Span(
+ attrs.Props{
+ "class": "float-right inline-block bg-green-500 text-white py-1 px-3 rounded-full text-xs",
+ },
+ elem.Text(s),
+ )
+ }
+ installButton := func(m *gallery.GalleryModel) elem.Node {
+ return elem.Button(
+ attrs.Props{
+ "class": "float-right inline-block rounded bg-primary px-6 pb-2 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
+ // post the Model ID as param
+ "hx-post": "/browse/install/model/" + fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name),
+ },
+ elem.Text("Install"),
+ )
+ }
+
+ descriptionDiv := func(m *gallery.GalleryModel) elem.Node {
+
+ return elem.Div(
+ attrs.Props{
+ "class": "p-6",
+ },
+ elem.H5(
+ attrs.Props{
+ "class": "mb-2 text-xl font-medium leading-tight",
+ },
+ elem.Text(m.Name),
+ ),
+ elem.P(
+ attrs.Props{
+ "class": "mb-4 text-base",
+ },
+ elem.Text(m.Description),
+ ),
+ )
+ }
+
+ actionDiv := func(m *gallery.GalleryModel) elem.Node {
+ return elem.Div(
+ attrs.Props{
+ "class": "px-6 pt-4 pb-2",
+ },
+ elem.Span(
+ attrs.Props{
+ "class": "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2",
+ },
+ elem.Text("Repository: "+m.Gallery.Name),
+ ),
+ elem.If(m.Installed, span("Installed"), installButton(m)),
+ )
+ }
+
+ for _, m := range models {
+ modelsElements = append(modelsElements,
+ elem.Div(
+ attrs.Props{
+ "class": "me-4 mb-2 block rounded-lg bg-white shadow-secondary-1 dark:bg-gray-800 dark:bg-surface-dark dark:text-white text-surface p-2",
+ },
+ elem.Div(
+ attrs.Props{
+ "class": "p-6",
+ },
+ descriptionDiv(m),
+ actionDiv(m),
+ // elem.If(m.Installed, span("Installed"), installButton(m)),
+
+ // elem.If(m.Installed, span("Installed"), span("Not Installed")),
+ ),
+ ),
+ )
+ }
+
+ wrapper := elem.Div(attrs.Props{
+ "class": "dark grid grid-cols-1 grid-rows-1 md:grid-cols-2 ",
+ }, modelsElements...)
+
+ return wrapper.Render()
+}
diff --git a/core/http/endpoints/localai/welcome.go b/core/http/endpoints/localai/welcome.go
index fd3e6230..291422c6 100644
--- a/core/http/endpoints/localai/welcome.go
+++ b/core/http/endpoints/localai/welcome.go
@@ -3,12 +3,16 @@ package localai
import (
"github.com/go-skynet/LocalAI/core/config"
"github.com/go-skynet/LocalAI/internal"
+ "github.com/go-skynet/LocalAI/pkg/model"
"github.com/gofiber/fiber/v2"
)
func WelcomeEndpoint(appConfig *config.ApplicationConfig,
- models []string, backendConfigs []config.BackendConfig) func(*fiber.Ctx) error {
+ cl *config.BackendConfigLoader, ml *model.ModelLoader) func(*fiber.Ctx) error {
return func(c *fiber.Ctx) error {
+ models, _ := ml.ListModels()
+ backendConfigs := cl.GetAllBackendConfigs()
+
summary := fiber.Map{
"Title": "LocalAI API - " + internal.PrintableVersion(),
"Version": internal.PrintableVersion(),
diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go
index 2651a53e..6415c894 100644
--- a/core/http/routes/localai.go
+++ b/core/http/routes/localai.go
@@ -14,13 +14,12 @@ func RegisterLocalAIRoutes(app *fiber.App,
cl *config.BackendConfigLoader,
ml *model.ModelLoader,
appConfig *config.ApplicationConfig,
+ galleryService *services.GalleryService,
auth func(*fiber.Ctx) error) {
app.Get("/swagger/*", swagger.HandlerDefault) // default
// LocalAI API endpoints
- galleryService := services.NewGalleryService(appConfig.ModelPath)
- galleryService.Start(appConfig.Context, cl)
modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService)
app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint())
diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go
new file mode 100644
index 00000000..b9ccd89a
--- /dev/null
+++ b/core/http/routes/ui.go
@@ -0,0 +1,107 @@
+package routes
+
+import (
+ "fmt"
+ "html/template"
+ "strings"
+
+ "github.com/go-skynet/LocalAI/core/config"
+ "github.com/go-skynet/LocalAI/core/http/elements"
+ "github.com/go-skynet/LocalAI/core/services"
+ "github.com/go-skynet/LocalAI/pkg/gallery"
+ "github.com/go-skynet/LocalAI/pkg/model"
+ "github.com/gofiber/fiber/v2"
+ "github.com/google/uuid"
+)
+
+func RegisterUIRoutes(app *fiber.App,
+ cl *config.BackendConfigLoader,
+ ml *model.ModelLoader,
+ appConfig *config.ApplicationConfig,
+ galleryService *services.GalleryService,
+ auth func(*fiber.Ctx) error) {
+
+ // Show the Models page
+ app.Get("/browse", auth, func(c *fiber.Ctx) error {
+ models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath)
+
+ summary := fiber.Map{
+ "Title": "LocalAI API - Models",
+ "Models": template.HTML(elements.ListModels(models)),
+ // "ApplicationConfig": appConfig,
+ }
+
+ // Render index
+ return c.Render("views/models", summary)
+ })
+
+ // HTMX: return the model details
+ // https://htmx.org/examples/active-search/
+ app.Post("/browse/search/models", auth, func(c *fiber.Ctx) error {
+ form := struct {
+ Search string `form:"search"`
+ }{}
+ if err := c.BodyParser(&form); err != nil {
+ return c.Status(fiber.StatusBadRequest).SendString(err.Error())
+ }
+
+ models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath)
+
+ filteredModels := []*gallery.GalleryModel{}
+ for _, m := range models {
+ if strings.Contains(m.Name, form.Search) {
+ filteredModels = append(filteredModels, m)
+ }
+ }
+
+ return c.SendString(elements.ListModels(filteredModels))
+ })
+
+ // https://htmx.org/examples/progress-bar/
+ app.Post("/browse/install/model/:id", auth, func(c *fiber.Ctx) error {
+ galleryID := strings.Clone(c.Params("id")) // strings.Clone is required!
+
+ id, err := uuid.NewUUID()
+ if err != nil {
+ return err
+ }
+
+ uid := id.String()
+
+ op := gallery.GalleryOp{
+ Id: uid,
+ GalleryName: galleryID,
+ Galleries: appConfig.Galleries,
+ }
+ go func() {
+ galleryService.C <- op
+ }()
+
+ return c.SendString(elements.StartProgressBar(uid, "0"))
+ })
+
+ // https://htmx.org/examples/progress-bar/
+ app.Get("/browse/job/progress/:uid", auth, func(c *fiber.Ctx) error {
+ jobUID := c.Params("uid")
+
+ status := galleryService.GetStatus(jobUID)
+ if status == nil {
+ //fmt.Errorf("could not find any status for ID")
+ return c.SendString(elements.ProgressBar("0"))
+ }
+
+ if status.Progress == 100 {
+ c.Set("HX-Trigger", "done")
+ return c.SendString(elements.ProgressBar("100"))
+ }
+ if status.Error != nil {
+ return c.SendString(elements.ErrorProgress(status.Error.Error()))
+ }
+
+ return c.SendString(elements.ProgressBar(fmt.Sprint(status.Progress)))
+ })
+
+ app.Get("/browse/job/:uid", auth, func(c *fiber.Ctx) error {
+ return c.SendString(elements.DoneProgress(c.Params("uid")))
+ })
+}
diff --git a/core/http/routes/welcome.go b/core/http/routes/welcome.go
index 29b9e586..6b600d2d 100644
--- a/core/http/routes/welcome.go
+++ b/core/http/routes/welcome.go
@@ -13,11 +13,7 @@ func RegisterPagesRoutes(app *fiber.App,
appConfig *config.ApplicationConfig,
auth func(*fiber.Ctx) error) {
- models, _ := ml.ListModels()
- backendConfigs := cl.GetAllBackendConfigs()
-
if !appConfig.DisableWelcomePage {
- app.Get("/", auth, localai.WelcomeEndpoint(appConfig, models, backendConfigs))
+ app.Get("/", auth, localai.WelcomeEndpoint(appConfig, cl, ml))
}
-
}
diff --git a/core/http/views/models.html b/core/http/views/models.html
new file mode 100644
index 00000000..63c6bba0
--- /dev/null
+++ b/core/http/views/models.html
@@ -0,0 +1,40 @@
+
+
+{{template "views/partials/head" .}}
+
+
+
+
+ {{template "views/partials/navbar" .}}
+
+
+
+
+
Available models from repositories
+
+
+
+
+
{{.Models}}
+
+
+
+ {{template "views/partials/footer" .}}
+
+
+
+
diff --git a/core/http/views/partials/head.html b/core/http/views/partials/head.html
index 59cdea33..9dbfecdb 100644
--- a/core/http/views/partials/head.html
+++ b/core/http/views/partials/head.html
@@ -3,11 +3,76 @@
{{.Title}}
-
+
+
+
+
+
\ No newline at end of file
diff --git a/core/http/views/partials/navbar.html b/core/http/views/partials/navbar.html
index c3d3223f..36332ed2 100644
--- a/core/http/views/partials/navbar.html
+++ b/core/http/views/partials/navbar.html
@@ -9,6 +9,7 @@
diff --git a/docs/content/docs/overview.md b/docs/content/docs/overview.md
index 5224bc49..f0f59494 100644
--- a/docs/content/docs/overview.md
+++ b/docs/content/docs/overview.md
@@ -56,7 +56,7 @@ icon = "info"
-**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that's compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families and architectures. Does not require GPU. It is maintained by [mudler](https://github.com/mudler).
+**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that's compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families and architectures. Does not require GPU. It is created and maintained by [Ettore Di Giacinto](https://github.com/mudler).
## Start LocalAI
diff --git a/go.mod b/go.mod
index 0bf9aa02..9485383e 100644
--- a/go.mod
+++ b/go.mod
@@ -1,6 +1,8 @@
module github.com/go-skynet/LocalAI
-go 1.21
+go 1.21.1
+
+toolchain go1.22.2
require (
github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf
@@ -71,6 +73,7 @@ require (
github.com/beorn7/perks v1.0.1 // indirect
github.com/cenkalti/backoff/v4 v4.1.3 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
+ github.com/chasefleming/elem-go v0.25.0 // indirect
github.com/containerd/continuity v0.3.0 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/dlclark/regexp2 v1.8.1 // indirect
diff --git a/go.sum b/go.sum
index 55fdaf06..b68834b2 100644
--- a/go.sum
+++ b/go.sum
@@ -37,6 +37,8 @@ github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj
github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/charmbracelet/glamour v0.7.0 h1:2BtKGZ4iVJCDfMF229EzbeR1QRKLWztO9dMtjmqZSng=
github.com/charmbracelet/glamour v0.7.0/go.mod h1:jUMh5MeihljJPQbJ/wf4ldw2+yBP59+ctV36jASy7ps=
+github.com/chasefleming/elem-go v0.25.0 h1:LYzr1auk39Bh3bdKloArOFV7sOBnOfSOKxsg58eWL0Q=
+github.com/chasefleming/elem-go v0.25.0/go.mod h1:hz73qILBIKnTgOujnSMtEj20/epI+f6vg71RUilJAA4=
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
diff --git a/main.go b/main.go
index 9976906b..04f13d3f 100644
--- a/main.go
+++ b/main.go
@@ -72,7 +72,7 @@ Version: ${version}
kong.Vars{
"basepath": kong.ExpandPath("."),
"remoteLibraryURL": "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/model_library.yaml",
- "galleries": `[{"name":"localai", "url":"github:mudler/LocalAI/gallery/index.yaml"}]`,
+ "galleries": `[{"name":"localai", "url":"github:mudler/LocalAI/gallery/index.yaml@master"}]`,
"version": internal.PrintableVersion(),
},
)
diff --git a/pkg/downloader/progress.go b/pkg/downloader/progress.go
index 6806f586..6cd6132b 100644
--- a/pkg/downloader/progress.go
+++ b/pkg/downloader/progress.go
@@ -5,6 +5,8 @@ import "hash"
type progressWriter struct {
fileName string
total int64
+ fileNo int
+ totalFiles int
written int64
downloadStatus func(string, string, string, float64)
hash hash.Hash
@@ -16,6 +18,17 @@ func (pw *progressWriter) Write(p []byte) (n int, err error) {
if pw.total > 0 {
percentage := float64(pw.written) / float64(pw.total) * 100
+ if pw.totalFiles > 1 {
+ // This is a multi-file download
+ // so we need to adjust the percentage
+ // to reflect the progress of the whole download
+ // This is the file pw.fileNo of pw.totalFiles files. We assume that
+ // the files before successfully downloaded.
+ percentage = percentage / float64(pw.totalFiles)
+ if pw.fileNo > 1 {
+ percentage += float64(pw.fileNo-1) * 100 / float64(pw.totalFiles)
+ }
+ }
//log.Debug().Msgf("Downloading %s: %s/%s (%.2f%%)", pw.fileName, formatBytes(pw.written), formatBytes(pw.total), percentage)
pw.downloadStatus(pw.fileName, formatBytes(pw.written), formatBytes(pw.total), percentage)
} else {
diff --git a/pkg/downloader/uri.go b/pkg/downloader/uri.go
index b678ae0d..46ccd6a1 100644
--- a/pkg/downloader/uri.go
+++ b/pkg/downloader/uri.go
@@ -136,7 +136,7 @@ func removePartialFile(tmpFilePath string) error {
return nil
}
-func DownloadFile(url string, filePath, sha string, downloadStatus func(string, string, string, float64)) error {
+func DownloadFile(url string, filePath, sha string, fileN, total int, downloadStatus func(string, string, string, float64)) error {
url = ConvertURL(url)
// Check if the file already exists
_, err := os.Stat(filePath)
@@ -209,6 +209,8 @@ func DownloadFile(url string, filePath, sha string, downloadStatus func(string,
fileName: tmpFilePath,
total: resp.ContentLength,
hash: sha256.New(),
+ fileNo: fileN,
+ totalFiles: total,
downloadStatus: downloadStatus,
}
_, err = io.Copy(io.MultiWriter(outFile, progress), resp.Body)
diff --git a/pkg/gallery/models.go b/pkg/gallery/models.go
index 10caedee..59971bbc 100644
--- a/pkg/gallery/models.go
+++ b/pkg/gallery/models.go
@@ -102,7 +102,7 @@ func InstallModel(basePath, nameOverride string, config *Config, configOverrides
}
// Download files and verify their SHA
- for _, file := range config.Files {
+ for i, file := range config.Files {
log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename)
if err := utils.VerifyPath(file.Filename, basePath); err != nil {
@@ -111,7 +111,7 @@ func InstallModel(basePath, nameOverride string, config *Config, configOverrides
// Create file path
filePath := filepath.Join(basePath, file.Filename)
- if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, downloadStatus); err != nil {
+ if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, i, len(config.Files), downloadStatus); err != nil {
return err
}
}
diff --git a/pkg/gallery/op.go b/pkg/gallery/op.go
index 99796812..73d748bf 100644
--- a/pkg/gallery/op.go
+++ b/pkg/gallery/op.go
@@ -1,11 +1,12 @@
package gallery
type GalleryOp struct {
- Req GalleryModel
Id string
- Galleries []Gallery
GalleryName string
ConfigURL string
+
+ Req GalleryModel
+ Galleries []Gallery
}
type GalleryOpStatus struct {
diff --git a/pkg/startup/model_preload.go b/pkg/startup/model_preload.go
index b09516a7..d267d846 100644
--- a/pkg/startup/model_preload.go
+++ b/pkg/startup/model_preload.go
@@ -54,7 +54,7 @@ func PreloadModelsConfigurations(modelLibraryURL string, modelPath string, model
// check if file exists
if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) {
modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml"
- err := downloader.DownloadFile(url, modelDefinitionFilePath, "", func(fileName, current, total string, percent float64) {
+ err := downloader.DownloadFile(url, modelDefinitionFilePath, "", 0, 0, func(fileName, current, total string, percent float64) {
utils.DisplayDownloadFunction(fileName, current, total, percent)
})
if err != nil {
From 8e36fe9b6fc51c0a13a18302b647655b52fff0aa Mon Sep 17 00:00:00 2001
From: fakezeta
Date: Tue, 23 Apr 2024 18:42:17 +0200
Subject: [PATCH 0204/2750] Transformers Backend: max_tokens adherence to
OpenAI API (#2108)
max token adherence to OpenAI API
improve adherence to OpenAI API when max tokens is omitted or equal to 0 in the request
---
.../python/transformers/transformers_server.py | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py
index 1b38a956..90053ed5 100755
--- a/backend/python/transformers/transformers_server.py
+++ b/backend/python/transformers/transformers_server.py
@@ -159,6 +159,11 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
quantization_config=quantization,
device_map=device_map,
torch_dtype=compute)
+ if request.ContextSize > 0:
+ self.max_tokens = request.ContextSize
+ else:
+ self.max_tokens = self.model.config.max_position_embeddings
+
self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True)
self.XPU = False
@@ -217,10 +222,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
if request.TopK == 0:
request.TopK = 40
- max_tokens = 200
- if request.Tokens > 0:
- max_tokens = request.Tokens
-
prompt = request.Prompt
if not request.Prompt and request.UseTokenizerTemplate and request.Messages:
prompt = self.tokenizer.apply_chat_template(request.Messages, tokenize=False, add_generation_prompt=True)
@@ -232,6 +233,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
eos_token_id.append(self.tokenizer.convert_tokens_to_ids(word))
inputs = self.tokenizer(prompt, return_tensors="pt")
+
+ if request.Tokens > 0:
+ max_tokens = request.Tokens
+ else:
+ max_tokens = self.max_tokens - inputs["input_ids"].size()[inputs["input_ids"].dim()-1]
+
if self.CUDA:
inputs = inputs.to("cuda")
if XPU and self.OV == False:
From 3411e072ca8d5c4a34267287ded4a2ad03bfb36d Mon Sep 17 00:00:00 2001
From: cryptk <421501+cryptk@users.noreply.github.com>
Date: Tue, 23 Apr 2024 11:43:00 -0500
Subject: [PATCH 0205/2750] Fix cleanup sonarqube findings (#2106)
* fix: update dockerignore and gitignore to exclude sonarqube work dir
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
* fix: remove useless equality check
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
* fix: use sonarqube Dockerfile recommendations
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
---------
Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
---
.dockerignore | 5 ++++-
.gitignore | 3 +++
Dockerfile | 23 +++++++++++-----------
core/http/endpoints/openai/assistant.go | 26 ++++++++++++-------------
4 files changed, 31 insertions(+), 26 deletions(-)
diff --git a/.dockerignore b/.dockerignore
index 2c394c48..ea2ea6b2 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -5,4 +5,7 @@ models
examples/chatbot-ui/models
examples/rwkv/models
examples/**/models
-Dockerfile*
\ No newline at end of file
+Dockerfile*
+
+# SonarQube
+.scannerwork
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index f1f860e9..9338b0c4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -44,3 +44,6 @@ prepare
*.pb.go
*pb2.py
*pb2_grpc.py
+
+# SonarQube
+.scannerwork
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
index 805ac3a6..4bc8b35e 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -3,7 +3,7 @@ ARG BASE_IMAGE=ubuntu:22.04
ARG GRPC_BASE_IMAGE=${BASE_IMAGE}
# extras or core
-FROM ${BASE_IMAGE} as requirements-core
+FROM ${BASE_IMAGE} AS requirements-core
USER root
@@ -24,7 +24,7 @@ RUN apt-get update && \
apt-get install -y ca-certificates curl python3-pip unzip && apt-get clean
# Install Go
-RUN curl -L -s https://go.dev/dl/go$GO_VERSION.linux-$TARGETARCH.tar.gz | tar -C /usr/local -xz
+RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
ENV PATH $PATH:/usr/local/go/bin
# Install grpc compilers
@@ -80,7 +80,7 @@ RUN test -n "$TARGETARCH" \
###################################
###################################
-FROM requirements-core as requirements-extras
+FROM requirements-core AS requirements-extras
RUN apt install -y gpg && \
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
@@ -105,7 +105,7 @@ RUN if [ ! -e /usr/bin/python ]; then \
###################################
###################################
-FROM ${GRPC_BASE_IMAGE} as grpc
+FROM ${GRPC_BASE_IMAGE} AS grpc
ARG MAKEFLAGS
ARG GRPC_VERSION=v1.58.0
@@ -121,16 +121,15 @@ RUN apt-get update && \
RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc
-RUN cd grpc && \
- mkdir -p cmake/build && \
- cd cmake/build && \
- cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF ../.. && \
+WORKDIR /build/grpc/cmake/build
+
+RUN cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF ../.. && \
make
###################################
###################################
-FROM requirements-${IMAGE_TYPE} as builder
+FROM requirements-${IMAGE_TYPE} AS builder
ARG GO_TAGS="stablediffusion tts"
ARG GRPC_BACKENDS
@@ -168,9 +167,11 @@ RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
COPY --from=grpc /build/grpc ./grpc/
-RUN cd /build/grpc/cmake/build && make install
+WORKDIR /build/grpc/cmake/build
+RUN make install
# Rebuild with defaults backends
+WORKDIR /build
RUN make build
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
@@ -288,7 +289,7 @@ RUN mkdir -p /build/models
# Define the health check command
HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
- CMD curl -f $HEALTHCHECK_ENDPOINT || exit 1
+ CMD curl -f ${HEALTHCHECK_ENDPOINT} || exit 1
VOLUME /build/models
EXPOSE 8080
diff --git a/core/http/endpoints/openai/assistant.go b/core/http/endpoints/openai/assistant.go
index dceb3789..c1efd8bd 100644
--- a/core/http/endpoints/openai/assistant.go
+++ b/core/http/endpoints/openai/assistant.go
@@ -455,21 +455,19 @@ func DeleteAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model
for i, assistant := range Assistants {
if assistant.ID == assistantID {
for j, fileId := range assistant.FileIDs {
- if fileId == fileId {
- Assistants[i].FileIDs = append(Assistants[i].FileIDs[:j], Assistants[i].FileIDs[j+1:]...)
+ Assistants[i].FileIDs = append(Assistants[i].FileIDs[:j], Assistants[i].FileIDs[j+1:]...)
- // Check if the file exists in the assistantFiles slice
- for i, assistantFile := range AssistantFiles {
- if assistantFile.ID == fileId {
- // Remove the file from the assistantFiles slice
- AssistantFiles = append(AssistantFiles[:i], AssistantFiles[i+1:]...)
- utils.SaveConfig(appConfig.ConfigsDir, AssistantsFileConfigFile, AssistantFiles)
- return c.Status(fiber.StatusOK).JSON(DeleteAssistantFileResponse{
- ID: fileId,
- Object: "assistant.file.deleted",
- Deleted: true,
- })
- }
+ // Check if the file exists in the assistantFiles slice
+ for i, assistantFile := range AssistantFiles {
+ if assistantFile.ID == fileId {
+ // Remove the file from the assistantFiles slice
+ AssistantFiles = append(AssistantFiles[:i], AssistantFiles[i+1:]...)
+ utils.SaveConfig(appConfig.ConfigsDir, AssistantsFileConfigFile, AssistantFiles)
+ return c.Status(fiber.StatusOK).JSON(DeleteAssistantFileResponse{
+ ID: fileId,
+ Object: "assistant.file.deleted",
+ Deleted: true,
+ })
}
}
}
From d344daf129e5d4504ce29ada434b6e6b1025ce31 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Tue, 23 Apr 2024 18:43:25 +0200
Subject: [PATCH 0206/2750] feat(models-ui): minor visual enhancements (#2109)
Show image if present, URL, tags, and better display buttons
Signed-off-by: Ettore Di Giacinto
---
core/http/elements/gallery.go | 96 ++++++++++++++++++++++++++++++-----
core/http/routes/ui.go | 10 ++--
core/http/views/models.html | 18 +++----
3 files changed, 96 insertions(+), 28 deletions(-)
diff --git a/core/http/elements/gallery.go b/core/http/elements/gallery.go
index 370ca82d..405f42ae 100644
--- a/core/http/elements/gallery.go
+++ b/core/http/elements/gallery.go
@@ -86,6 +86,18 @@ func StartProgressBar(uid, progress string) string {
).Render()
}
+func cardSpan(text, icon string) elem.Node {
+ return elem.Span(
+ attrs.Props{
+ "class": "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2",
+ },
+ elem.I(attrs.Props{
+ "class": icon + " pr-2",
+ }),
+ elem.Text(text),
+ )
+}
+
func ListModels(models []*gallery.GalleryModel) string {
modelsElements := []elem.Node{}
span := func(s string) elem.Node {
@@ -99,10 +111,17 @@ func ListModels(models []*gallery.GalleryModel) string {
installButton := func(m *gallery.GalleryModel) elem.Node {
return elem.Button(
attrs.Props{
- "class": "float-right inline-block rounded bg-primary px-6 pb-2 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
+ "data-twe-ripple-init": "",
+ "data-twe-ripple-color": "light",
+ "class": "float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
// post the Model ID as param
"hx-post": "/browse/install/model/" + fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name),
},
+ elem.I(
+ attrs.Props{
+ "class": "fa-solid fa-download pr-2",
+ },
+ ),
elem.Text("Install"),
)
}
@@ -111,7 +130,7 @@ func ListModels(models []*gallery.GalleryModel) string {
return elem.Div(
attrs.Props{
- "class": "p-6",
+ "class": "p-6 text-surface dark:text-white",
},
elem.H5(
attrs.Props{
@@ -129,42 +148,93 @@ func ListModels(models []*gallery.GalleryModel) string {
}
actionDiv := func(m *gallery.GalleryModel) elem.Node {
+ nodes := []elem.Node{
+ cardSpan("Repository: "+m.Gallery.Name, "fa-brands fa-git-alt"),
+ }
+
+ if m.License != "" {
+ nodes = append(nodes,
+ cardSpan("License: "+m.License, "fas fa-book"),
+ )
+ }
+
+ for _, tag := range m.Tags {
+ nodes = append(nodes,
+ cardSpan(tag, "fas fa-tag"),
+ )
+ }
+
+ for i, url := range m.URLs {
+ nodes = append(nodes,
+ elem.A(
+ attrs.Props{
+ "class": "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2",
+ "href": url,
+ "target": "_blank",
+ },
+ elem.I(attrs.Props{
+ "class": "fas fa-link pr-2",
+ }),
+ elem.Text("Link #"+fmt.Sprintf("%d", i+1)),
+ ))
+ }
+
return elem.Div(
attrs.Props{
"class": "px-6 pt-4 pb-2",
},
- elem.Span(
+ elem.P(
attrs.Props{
- "class": "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2",
+ "class": "mb-4 text-base",
},
- elem.Text("Repository: "+m.Gallery.Name),
+ nodes...,
),
elem.If(m.Installed, span("Installed"), installButton(m)),
)
}
for _, m := range models {
+
+ elems := []elem.Node{}
+
+ if m.Icon != "" {
+ elems = append(elems,
+
+ elem.Div(attrs.Props{
+ "class": "flex justify-center items-center",
+ },
+ elem.A(attrs.Props{
+ "href": "#!",
+ // "class": "justify-center items-center",
+ },
+ elem.Img(attrs.Props{
+ // "class": "rounded-t-lg object-fit object-center h-96",
+ "class": "rounded-t-lg max-h-48 max-w-96 object-cover",
+ "src": m.Icon,
+ }),
+ ),
+ ))
+ }
+
+ elems = append(elems, descriptionDiv(m), actionDiv(m))
modelsElements = append(modelsElements,
elem.Div(
attrs.Props{
- "class": "me-4 mb-2 block rounded-lg bg-white shadow-secondary-1 dark:bg-gray-800 dark:bg-surface-dark dark:text-white text-surface p-2",
+ "class": " me-4 mb-2 block rounded-lg bg-white shadow-secondary-1 dark:bg-gray-800 dark:bg-surface-dark dark:text-white text-surface pb-2",
},
elem.Div(
attrs.Props{
- "class": "p-6",
+ // "class": "p-6",
},
- descriptionDiv(m),
- actionDiv(m),
- // elem.If(m.Installed, span("Installed"), installButton(m)),
-
- // elem.If(m.Installed, span("Installed"), span("Not Installed")),
+ elems...,
),
),
)
}
wrapper := elem.Div(attrs.Props{
- "class": "dark grid grid-cols-1 grid-rows-1 md:grid-cols-2 ",
+ "class": "dark grid grid-cols-1 grid-rows-1 md:grid-cols-3 block rounded-lg shadow-secondary-1 dark:bg-surface-dark",
+ //"class": "block rounded-lg bg-white shadow-secondary-1 dark:bg-surface-dark",
}, modelsElements...)
return wrapper.Render()
diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go
index b9ccd89a..c64ec5ff 100644
--- a/core/http/routes/ui.go
+++ b/core/http/routes/ui.go
@@ -26,8 +26,9 @@ func RegisterUIRoutes(app *fiber.App,
models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath)
summary := fiber.Map{
- "Title": "LocalAI API - Models",
- "Models": template.HTML(elements.ListModels(models)),
+ "Title": "LocalAI - Models",
+ "Models": template.HTML(elements.ListModels(models)),
+ "Repositories": appConfig.Galleries,
// "ApplicationConfig": appConfig,
}
@@ -49,7 +50,10 @@ func RegisterUIRoutes(app *fiber.App,
filteredModels := []*gallery.GalleryModel{}
for _, m := range models {
- if strings.Contains(m.Name, form.Search) {
+ if strings.Contains(m.Name, form.Search) ||
+ strings.Contains(m.Description, form.Search) ||
+ strings.Contains(m.Gallery.Name, form.Search) ||
+ strings.Contains(strings.Join(m.Tags, ","), form.Search) {
filteredModels = append(filteredModels, m)
}
}
diff --git a/core/http/views/models.html b/core/http/views/models.html
index 63c6bba0..be3c1bef 100644
--- a/core/http/views/models.html
+++ b/core/http/views/models.html
@@ -7,20 +7,14 @@
{{template "views/partials/navbar" .}}
-
-
Available models from repositories
-
+
+ 🖼️ Available models from {{ len .Repositories }} repositories
+
+
+
+
Date: Tue, 23 Apr 2024 19:35:45 +0200
Subject: [PATCH 0207/2750] feat(gallery): add llama3, hermes, phi-3, and
others (#2110)
Also adds embeddings and llava models
Signed-off-by: Ettore Di Giacinto
---
gallery/codellama.yaml | 21 +
gallery/dreamshaper.yaml | 26 ++
gallery/hermes-2-pro-mistral.yaml | 81 ++++
gallery/index.yaml | 668 ++++++++++++++++++++----------
gallery/llama3-instruct.yaml | 64 +++
gallery/llava.yaml | 32 ++
gallery/phi-2-chat.yaml | 50 +++
gallery/phi-2-orange.yaml | 33 ++
gallery/phi-3-chat.yaml | 31 ++
gallery/piper.yaml | 15 +
gallery/sentencetransformers.yaml | 12 +
11 files changed, 804 insertions(+), 229 deletions(-)
create mode 100644 gallery/codellama.yaml
create mode 100644 gallery/dreamshaper.yaml
create mode 100644 gallery/hermes-2-pro-mistral.yaml
create mode 100644 gallery/llama3-instruct.yaml
create mode 100644 gallery/llava.yaml
create mode 100644 gallery/phi-2-chat.yaml
create mode 100644 gallery/phi-2-orange.yaml
create mode 100644 gallery/phi-3-chat.yaml
create mode 100644 gallery/piper.yaml
create mode 100644 gallery/sentencetransformers.yaml
diff --git a/gallery/codellama.yaml b/gallery/codellama.yaml
new file mode 100644
index 00000000..1b773ed6
--- /dev/null
+++ b/gallery/codellama.yaml
@@ -0,0 +1,21 @@
+name: "codellama"
+license: llama2
+
+description: |
+ Code Llama is a collection of pretrained and fine-tuned generative text models ranging in scale from 7 billion to 34 billion parameters. This model is designed for general code synthesis and understanding.
+
+urls:
+- https://huggingface.co/TheBloke/CodeLlama-7B-GGUF
+- https://huggingface.co/meta-llama/CodeLlama-7b-hf
+
+tags:
+- llm
+- gguf
+- gpu
+- cpu
+
+config_file: |
+ backend: llama-cpp
+ context_size: 4096
+ f16: true
+ mmap: true
\ No newline at end of file
diff --git a/gallery/dreamshaper.yaml b/gallery/dreamshaper.yaml
new file mode 100644
index 00000000..894ae0cf
--- /dev/null
+++ b/gallery/dreamshaper.yaml
@@ -0,0 +1,26 @@
+name: "dreamshaper"
+icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/ggO2sBDJ8Bhc6w-zwTx5j.png
+license: other
+
+description: |
+ A text-to-image model that uses Stable Diffusion 1.5 to generate images from text prompts. This model is DreamShaper model by Lykon.
+
+urls:
+- https://civitai.com/models/4384/dreamshaper
+
+tags:
+- text-to-image
+- stablediffusion
+- sd-1.5
+- gpu
+
+config_file: |
+ backend: diffusers
+ step: 25
+ f16: true
+
+ diffusers:
+ pipeline_type: StableDiffusionPipeline
+ cuda: true
+ enable_parameters: "negative_prompt,num_inference_steps"
+ scheduler_type: "k_dpmpp_2m"
diff --git a/gallery/hermes-2-pro-mistral.yaml b/gallery/hermes-2-pro-mistral.yaml
new file mode 100644
index 00000000..5a79d5cb
--- /dev/null
+++ b/gallery/hermes-2-pro-mistral.yaml
@@ -0,0 +1,81 @@
+name: "hermes-2-pro-mistral"
+icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/ggO2sBDJ8Bhc6w-zwTx5j.png
+license: apache-2.0
+
+description: |
+ Hermes 2 Pro is an upgraded, retrained version of Nous Hermes 2, consisting of an updated and cleaned version of the OpenHermes 2.5 Dataset, as well as a newly introduced Function Calling and JSON Mode dataset developed in-house.
+
+ This new version of Hermes maintains its excellent general task and conversation capabilities - but also excels at Function Calling, JSON Structured Outputs, and has improved on several other metrics as well, scoring a 90% on our function calling evaluation built in partnership with Fireworks.AI, and an 81% on our structured JSON Output evaluation.
+
+ Hermes Pro takes advantage of a special system prompt and multi-turn function calling structure with a new chatml role in order to make function calling reliable and easy to parse. Learn more about prompting below.
+
+ This work was a collaboration between Nous Research, @interstellarninja, and Fireworks.AI
+
+ Learn more about the function calling on our github repo here: https://github.com/NousResearch/Hermes-Function-Calling/tree/main
+
+urls:
+- https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF
+
+tags:
+- llm
+- gguf
+- gpu
+- cpu
+
+config_file: |
+ mmap: true
+ parameters:
+ model: Hermes-2-Pro-Mistral-7B.Q6_K.gguf
+
+ template:
+ chat_message: |
+ <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
+ {{- if .FunctionCall }}
+
+ {{- else if eq .RoleName "tool" }}
+
+ {{- end }}
+ {{- if .Content}}
+ {{.Content }}
+ {{- end }}
+ {{- if .FunctionCall}}
+ {{toJson .FunctionCall}}
+ {{- end }}
+ {{- if .FunctionCall }}
+
+ {{- else if eq .RoleName "tool" }}
+
+ {{- end }}
+ <|im_end|>
+ # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
+ function: |
+ <|im_start|>system
+ You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
+
+ {{range .Functions}}
+ {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
+ {{end}}
+
+ Use the following pydantic model json schema for each tool call you will make:
+ {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
+ For each function call return a json object with function name and arguments within XML tags as follows:
+
+ {'arguments': , 'name': }
+
+ <|im_end|>
+ {{.Input -}}
+ <|im_start|>assistant
+
+ chat: |
+ {{.Input -}}
+ <|im_start|>assistant
+ completion: |
+ {{.Input}}
+ context_size: 4096
+ f16: true
+ stopwords:
+ - <|im_end|>
+ -
+ - "\n "
+ - "\n\n\n"
+
diff --git a/gallery/index.yaml b/gallery/index.yaml
index 6b882768..4582838e 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1,503 +1,713 @@
+## LLM
+
+### START LLAMA3
+- &llama3
+ url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
+ name: "llama3-8b-instruct"
+ overrides:
+ parameters:
+ model: Meta-Llama-3-8B-Instruct-Q5_K_M.gguf
+ files:
+ - filename: vicuna-7b-q5_k.gguf
+ sha256: cce3ba85525027d0fff520cad053d5a6f32c293382a40b3d55a650282c267787
+ uri: huggingface://second-state/Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct-Q5_K_M.gguf
+
+### START LLaVa
+- &llava
+ url: "github:mudler/LocalAI/gallery/llava.yaml@master"
+ name: "llava-1.6-vicuna"
+ overrides:
+ mmproj: mmproj-vicuna7b-f16.gguf
+ parameters:
+ model: vicuna-7b-q5_k.gguf
+ files:
+ - filename: vicuna-7b-q5_k.gguf
+ uri: https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/vicuna-7b-q5_k.gguf
+ - filename: mmproj-vicuna7b-f16.gguf
+ uri: https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/mmproj-vicuna7b-f16.gguf
+- <<: *llava
+ name: "llava-1.6-mistral"
+ overrides:
+ mmproj: llava-v1.6-7b-mmproj-f16.gguf
+ parameters:
+ model: llava-v1.6-mistral-7b.gguf
+ files:
+ - filename: llava-v1.6-mistral-7b.gguf
+ sha256: 31826170ffa2e8080bbcd74cac718f906484fd5a59895550ef94c1baa4997595
+ uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q6_K.gguf
+ - filename: llava-v1.6-7b-mmproj-f16.gguf
+ sha256: 00205ee8a0d7a381900cd031e43105f86aa0d8c07bf329851e85c71a26632d16
+ uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf
+- <<: *llava
+ name: "llava-1.5"
+ overrides:
+ mmproj: llava-v1.5-7b-mmproj-Q8_0.gguf
+ parameters:
+ model: llava-v1.5-7b-Q4_K.gguf
+ files:
+ - filename: llava-v1.5-7b-Q4_K.gguf
+ sha256: c91ebf0a628ceb25e374df23ad966cc1bf1514b33fecf4f0073f9619dec5b3f9
+ uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-Q4_K.gguf
+ - filename: llava-v1.5-7b-mmproj-Q8_0.gguf
+ sha256: 09c230de47f6f843e4841656f7895cac52c6e7ec7392acb5e8527de8b775c45a
+ uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-mmproj-Q8_0.gguf
+### START Phi-2
+- &phi-2
+ url: "github:mudler/LocalAI/gallery/phi-2-chat.yaml@master"
+ name: "phi-2-chat:Q8_0"
+ overrides:
+ parameters:
+ model: phi-2-layla-v1-chatml-Q8_0.gguf
+ files:
+ - filename: "phi-2-layla-v1-chatml-Q8_0.gguf"
+ sha256: "0cf542a127c2c835066a78028009b7eddbaf773cc2a26e1cb157ce5e09c1a2e0"
+ uri: "huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf"
+- <<: *phi-2
+ name: "phi-2-chat"
+ overrides:
+ parameters:
+ model: phi-2-layla-v1-chatml-Q4_K.gguf
+ files:
+ - filename: "phi-2-layla-v1-chatml-Q4_K.gguf"
+ sha256: "b071e5624b60b8911f77261398802c4b4079c6c689e38e2ce75173ed62bc8a48"
+ uri: "huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q4_K.gguf"
+- <<: *phi-2
+ name: "phi-2-orange"
+ overrides:
+ parameters:
+ model: phi-2-orange.Q4_0.gguf
+ files:
+ - filename: "phi-2-orange.Q4_0.gguf"
+ sha256: "49cb710ae688e1b19b1b299087fa40765a0cd677e3afcc45e5f7ef6750975dcf"
+ uri: "huggingface://TheBloke/phi-2-orange-GGUF/phi-2-orange.Q4_0.gguf"
+### START Phi-3
+- &phi-3
+ url: "github:mudler/LocalAI/gallery/phi-3-chat.yaml@master"
+ name: "phi-3-mini-4k-instruct"
+ overrides:
+ parameters:
+ model: Phi-3-mini-4k-instruct-q4.gguf
+ files:
+ - filename: "Phi-3-mini-4k-instruct-q4.gguf"
+ sha256: "4fed7364ee3e0c7cb4fe0880148bfdfcd1b630981efa0802a6b62ee52e7da97e"
+ uri: "huggingface://microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-q4.gguf"
+- <<: *phi-3
+ name: "phi-3-mini-4k-instruct:fp16"
+ overrides:
+ parameters:
+ model: Phi-3-mini-4k-instruct-fp16.gguf
+ files:
+ - filename: "Phi-3-mini-4k-instruct-fp16.gguf"
+ sha256: "ad9f8ff11cd096115adc8ff50befa22fc3da2718672ddd2ab30faccd70488605"
+ uri: "huggingface://microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-fp16.gguf"
+### START Hermes-2-Pro-Mistral
+- &hermes-2-pro-mistral
+ url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"
+ name: "hermes-2-pro-mistral"
+ overrides:
+ parameters:
+ model: Hermes-2-Pro-Mistral-7B.Q4_0.gguf
+ files:
+ - filename: "Hermes-2-Pro-Mistral-7B.Q4_0.gguf"
+ sha256: "f446c3125026f7af6757dd097dda02280adc85e908c058bd6f1c41a118354745"
+ uri: "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q4_0.gguf"
+- <<: *hermes-2-pro-mistral
+ name: "hermes-2-pro-mistral:Q6_K"
+ overrides:
+ parameters:
+ model: Hermes-2-Pro-Mistral-7B.Q6_K.gguf
+ files:
+ - filename: "Hermes-2-Pro-Mistral-7B.Q6_K.gguf"
+ sha256: "40adc3b227bc36764de148fdda4df5df385adc06650d58d4dbe726ee0214eeff"
+ uri: "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf"
+- <<: *hermes-2-pro-mistral
+ name: "hermes-2-pro-mistral"
+ overrides:
+ parameters:
+ model: Hermes-2-Pro-Mistral-7B.Q8_0.gguf
+ files:
+ - filename: "Hermes-2-Pro-Mistral-7B.Q8_0.gguf"
+ sha256: "b6d95d7ec9a395b7568cc94b0447fd4f90b6f69d6e44794b1fbb84e3f732baca"
+ uri: "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q8_0.gguf"
+### END Hermes-2-Pro-Mistral
+
+### START Codellama
+- &codellama
+ url: "github:mudler/LocalAI/gallery/codellama.yaml@master"
+ name: "codellama-7b"
+ overrides:
+ parameters:
+ model: codellama-7b.Q4_0.gguf
+ files:
+ - filename: "codellama-7b.Q4_0.gguf"
+ sha256: "33052f6dd41436db2f83bd48017b6fff8ce0184e15a8a227368b4230f1da97b5"
+ uri: "huggingface://TheBloke/CodeLlama-7B-GGUF/codellama-7b.Q4_0.gguf"
+
+### START Embeddings
+- &sentencentransformers
+ name: "all-MiniLM-L6-v2"
+ url: "github:mudler/LocalAI/gallery/sentencetransformers.yaml@master"
+ overrides:
+ parameters:
+ model: all-MiniLM-L6-v2
+
+### START Image generation
+- &diffusers
+ name: dreamshaper
+ url: "github:mudler/LocalAI/gallery/dreamshaper.yaml@master"
+ overrides:
+ parameters:
+ model: DreamShaper_8_pruned.safetensors
+ files:
+ - filename: DreamShaper_8_pruned.safetensors
+ uri: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors
+ sha256: 879db523c30d3b9017143d56705015e15a2cb5628762c11d086fed9538abd7fd
+
## Whisper
- url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master"
name: "whisper-1"
- license: other
+
## Bert embeddings
- url: "github:mudler/LocalAI/gallery/bert-embeddings.yaml@master"
name: "bert-embeddings"
- license: other
+
- url: "github:mudler/LocalAI/gallery/bert-embeddings.yaml@master"
name: "text-embedding-ada-002"
- license: other
+
## Stable Diffusion
- url: github:mudler/LocalAI/gallery/stablediffusion.yaml@master
name: stablediffusion
- license: other
+
## Tiny Dream
- url: github:mudler/LocalAI/gallery/tinydream.yaml@master
name: tinydream
- license: other
+
## Piper TTS
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-en-us-kathleen-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ override:
+ parameters:
+ model: en-us-kathleen-low.onnx
files:
- filename: voice-en-us-kathleen-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-kathleen-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-ca-upc_ona-x-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ override:
+ parameters:
+ model: ca-upc_ona-x-low.onnx
files:
- filename: voice-ca-upc_ona-x-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ca-upc_ona-x-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-ca-upc_pau-x-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ override:
+ parameters:
+ model: ca-upc_pau-x-low.onnx
files:
- filename: voice-ca-upc_pau-x-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ca-upc_pau-x-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-da-nst_talesyntese-medium
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ override:
+ parameters:
+ model: da-nst_talesyntese-medium.onnx
files:
- filename: voice-da-nst_talesyntese-medium.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-da-nst_talesyntese-medium.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-de-eva_k-x-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ override:
+ parameters:
+ model: de-eva_k-x-low.onnx
files:
- filename: voice-de-eva_k-x-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-eva_k-x-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-de-karlsson-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ override:
+ parameters:
+ model: de-karlsson-low.onnx
files:
- filename: voice-de-karlsson-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-karlsson-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-de-kerstin-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ override:
+ parameters:
+ model: de-kerstin-low.onnx
files:
- filename: voice-de-kerstin-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-kerstin-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-de-pavoque-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ override:
+ parameters:
+ model: de-pavoque-low.onnx
files:
- filename: voice-de-pavoque-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-pavoque-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-de-ramona-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ override:
+ parameters:
+ model: de-ramona-low.onnx
files:
- filename: voice-de-ramona-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-ramona-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-de-thorsten-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: de-thorsten-low.onnx
files:
- filename: voice-de-thorsten-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-thorsten-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-el-gr-rapunzelina-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: el-gr-rapunzelina-low.onnx
files:
- filename: voice-el-gr-rapunzelina-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-el-gr-rapunzelina-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-en-gb-alan-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: en-gb-alan-low.onnx
files:
- filename: voice-en-gb-alan-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-gb-alan-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-en-gb-southern_english_female-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: en-gb-southern_english
files:
- filename: voice-en-gb-southern_english_female-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-gb-southern_english_female-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-en-us-amy-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: en-us-amy-low.onnx
files:
- filename: voice-en-us-amy-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-en-us-danny-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: en-us-danny-low.onnx
files:
- filename: voice-en-us-danny-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-danny-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-en-us-kathleen-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: en-us-kathleen-low.onnx
files:
- filename: voice-en-us-kathleen-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-kathleen-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-en-us-lessac-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: en-us-lessac-low.onnx
files:
- filename: voice-en-us-lessac-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-lessac-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-en-us-lessac-medium
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: en-us-lessac-medium.onnx
files:
- filename: voice-en-us-lessac-medium.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-lessac-medium.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-en-us-libritts-high
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: en-us-libritts-high.onnx
files:
- filename: voice-en-us-libritts-high.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-libritts-high.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-en-us-ryan-high
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: en-us-ryan-high.onnx
files:
- filename: voice-en-us-ryan-high.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-ryan-high.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-en-us-ryan-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: en-us-ryan-low.onnx
files:
- filename: voice-en-us-ryan-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-ryan-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-en-us-ryan-medium
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: en-us-ryan-medium.onnx
files:
- filename: voice-en-us-ryan-medium.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-ryan-medium.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-en-us_lessac
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ override:
+ parameters:
+ model: en-us-lessac.onnx
files:
- filename: voice-en-us_lessac.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us_lessac.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-es-carlfm-x-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+ override:
+ parameters:
+ model: es-carlfm-x-low.onnx
files:
- filename: voice-es-carlfm-x-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-es-carlfm-x-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-es-mls_10246-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: es-mls_10246-low.onnx
files:
- filename: voice-es-mls_10246-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-es-mls_10246-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-es-mls_9972-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: es-mls_9972-low.onnx
files:
- filename: voice-es-mls_9972-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-es-mls_9972-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-fi-harri-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: fi-harri-low.onnx
files:
- filename: voice-fi-harri-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fi-harri-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-fr-gilles-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: fr-gilles-low.onnx
files:
- filename: voice-fr-gilles-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-gilles-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-fr-mls_1840-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: fr-mls_1840-low.onnx
files:
- filename: voice-fr-mls_1840-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-mls_1840-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-fr-siwis-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: fr-siwis-low.onnx
files:
- filename: voice-fr-siwis-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-siwis-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-fr-siwis-medium
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: fr-siwis-medium.onnx
files:
- filename: voice-fr-siwis-medium.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-siwis-medium.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-is-bui-medium
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: is-bui-medium.onnx
files:
- filename: voice-is-bui-medium.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-is-bui-medium.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-is-salka-medium
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: is-salka-medium.onnx
files:
- filename: voice-is-salka-medium.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-is-salka-medium.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-is-steinn-medium
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: is-steinn-medium.onnx
files:
- filename: voice-is-steinn-medium.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-is-steinn-medium.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-is-ugla-medium
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: is-ugla-medium.onnx
files:
- filename: voice-is-ugla-medium.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-is-ugla-medium.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-it-riccardo_fasol-x-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: it-riccardo_fasol-x-low.onnx
files:
- filename: voice-it-riccardo_fasol-x-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-it-riccardo_fasol-x-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-kk-iseke-x-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: kk-iseke-x-low.onnx
files:
- filename: voice-kk-iseke-x-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-kk-iseke-x-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-kk-issai-high
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: kk-issai-high.onnx
files:
- filename: voice-kk-issai-high.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-kk-issai-high.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-kk-raya-x-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: kk-raya-x-low.onnx
files:
- filename: voice-kk-raya-x-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-kk-raya-x-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-ne-google-medium
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: ne-google-medium.onnx
files:
- filename: voice-ne-google-medium.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ne-google-medium.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-ne-google-x-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: ne-google-x-low.onnx
files:
- filename: voice-ne-google-x-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ne-google-x-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-nl-mls_5809-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: nl-mls_5809-low.onnx
files:
- filename: voice-nl-mls_5809-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-mls_5809-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-nl-mls_7432-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: nl-mls_7432-low.onnx
files:
- filename: voice-nl-mls_7432-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-mls_7432-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-nl-nathalie-x-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: nl-nathalie-x-low.onnx
files:
- filename: voice-nl-nathalie-x-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-nathalie-x-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-nl-rdh-medium
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: nl-rdh-medium.onnx
files:
- filename: voice-nl-rdh-medium.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-rdh-medium.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-nl-rdh-x-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: nl-rdh-x-low.onnx
files:
- filename: voice-nl-rdh-x-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-rdh-x-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-no-talesyntese-medium
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: no-talesyntese-medium.onnx
files:
- filename: voice-no-talesyntese-medium.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-no-talesyntese-medium.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-pl-mls_6892-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: pl-mls_6892-low.onnx
files:
- filename: voice-pl-mls_6892-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-pl-mls_6892-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-pt-br-edresson-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: pt-br-edresson-low.onnx
files:
- filename: voice-pt-br-edresson-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-pt-br-edresson-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-ru-irinia-medium
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: ru-irinia-medium.onnx
files:
- filename: voice-ru-irinia-medium.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ru-irinia-medium.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-sv-se-nst-medium
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: sv-se-nst-medium.onnx
files:
- filename: voice-sv-se-nst-medium.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-sv-se-nst-medium.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-uk-lada-x-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: uk-lada-x-low.onnx
files:
- filename: voice-uk-lada-x-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-uk-lada-x-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-vi-25hours-single-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: vi-25hours-single-low.onnx
files:
- filename: voice-vi-25hours-single-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-vi-25hours-single-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-vi-vivos-x-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: vi-vivos-x-low.onnx
files:
- filename: voice-vi-vivos-x-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-vi-vivos-x-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-zh-cn-huayan-x-low
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: zh-cn-huayan-x-low.onnx
files:
- filename: voice-zh-cn-huayan-x-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-zh-cn-huayan-x-low.tar.gz
-- url: github:mudler/LocalAI/gallery/virtual.yaml@master
+- url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-zh_CN-huayan-medium
- license: other
- urls:
- - https://github.com/rhasspy/piper/releases/download/v0.0.2/
+
+ override:
+ parameters:
+ model: zh_CN-huayan-medium.onnx
files:
- filename: voice-zh_CN-huayan-medium.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-zh_CN-huayan-medium.tar.gz
\ No newline at end of file
diff --git a/gallery/llama3-instruct.yaml b/gallery/llama3-instruct.yaml
new file mode 100644
index 00000000..4e29e740
--- /dev/null
+++ b/gallery/llama3-instruct.yaml
@@ -0,0 +1,64 @@
+name: "llama3-instruct"
+license: llama3
+
+description: |
+ Meta developed and released the Meta Llama 3 family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8 and 70B sizes. The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks. Further, in developing these models, we took great care to optimize helpfulness and safety.
+
+ Model developers Meta
+
+ Variations Llama 3 comes in two sizes — 8B and 70B parameters — in pre-trained and instruction tuned variants.
+
+ Input Models input text only.
+
+ Output Models generate text and code only.
+
+ Model Architecture Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.
+urls:
+- https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct
+
+tags:
+- llm
+- gguf
+- gpu
+- cpu
+
+config_file: |
+ mmap: true
+ template:
+ chat_message: |
+ <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
+
+ {{ if .FunctionCall -}}
+ Function call:
+ {{ else if eq .RoleName "tool" -}}
+ Function response:
+ {{ end -}}
+ {{ if .Content -}}
+ {{.Content -}}
+ {{ else if .FunctionCall -}}
+ {{ toJson .FunctionCall -}}
+ {{ end -}}
+ <|eot_id|>
+ function: |
+ <|start_header_id|>system<|end_header_id|>
+
+ You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
+
+ {{range .Functions}}
+ {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
+ {{end}}
+
+ Use the following pydantic model json schema for each tool call you will make:
+ {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+ Function call:
+ chat: |
+ <|begin_of_text|>{{.Input }}
+ <|start_header_id|>assistant<|end_header_id|>
+ completion: |
+ {{.Input}}
+ context_size: 8192
+ f16: true
+ stopwords:
+ - <|im_end|>
+ -
+ - "<|eot_id|>"
diff --git a/gallery/llava.yaml b/gallery/llava.yaml
new file mode 100644
index 00000000..159ae34c
--- /dev/null
+++ b/gallery/llava.yaml
@@ -0,0 +1,32 @@
+name: "llava"
+license: apache-2.0
+
+description: |
+ LLaVA represents a novel end-to-end trained large multimodal model that combines a vision encoder and Vicuna for general-purpose visual and language understanding, achieving impressive chat capabilities mimicking spirits of the multimodal GPT-4 and setting a new state-of-the-art accuracy on Science QA.
+
+urls:
+- https://llava-vl.github.io/
+
+tags:
+- llm
+- multimodal
+- gguf
+- gpu
+- cpu
+
+config_file: |
+ backend: llama-cpp
+ context_size: 4096
+ f16: true
+
+ mmap: true
+ roles:
+ user: "USER:"
+ assistant: "ASSISTANT:"
+ system: "SYSTEM:"
+
+ template:
+ chat: |
+ A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
+ {{.Input}}
+ ASSISTANT:
diff --git a/gallery/phi-2-chat.yaml b/gallery/phi-2-chat.yaml
new file mode 100644
index 00000000..3370311f
--- /dev/null
+++ b/gallery/phi-2-chat.yaml
@@ -0,0 +1,50 @@
+name: "phi-2-chatml"
+license: mit
+
+description: |
+ Phi-2 fine-tuned by the OpenHermes 2.5 dataset optimised for multi-turn conversation and character impersonation.
+
+ The dataset has been pre-processed by doing the following:
+
+ - remove all refusals
+ - remove any mention of AI assistant
+ - split any multi-turn dialog generated in the dataset into multi-turn conversations records
+ - added nfsw generated conversations from the Teatime dataset
+
+ Developed by: l3utterfly
+ Funded by: Layla Network
+ Model type: Phi
+ Language(s) (NLP): English
+ License: MIT
+ Finetuned from model: Phi-2
+
+
+urls:
+- https://huggingface.co/l3utterfly/phi-2-layla-v1-chatml
+- https://huggingface.co/l3utterfly/phi-2-layla-v1-chatml-gguf
+
+tags:
+- llm
+- gguf
+- gpu
+- cpu
+
+config_file: |
+ mmap: true
+ # parameters:
+ # model: huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf
+
+ template:
+ chat_message: |
+ <|im_start|>{{ .RoleName }}
+ {{.Content}}<|im_end|>
+ chat: |
+ {{.Input}}
+ <|im_start|>assistant
+ completion: |
+ {{.Input}}
+ context_size: 4096
+ f16: true
+ stopwords:
+ - <|im_end|>
+
diff --git a/gallery/phi-2-orange.yaml b/gallery/phi-2-orange.yaml
new file mode 100644
index 00000000..9800f8da
--- /dev/null
+++ b/gallery/phi-2-orange.yaml
@@ -0,0 +1,33 @@
+name: "phi-2-orange"
+license: mit
+icon: "https://huggingface.co/rhysjones/phi-2-orange/resolve/main/phi-2-orange.jpg"
+description: |
+ A two-step finetune of Phi-2, with a bit of zest.
+
+ There is an updated model at rhysjones/phi-2-orange-v2 which has higher evals, if you wish to test.
+urls:
+- https://huggingface.co/rhysjones/phi-2-orange
+- https://huggingface.co/TheBloke/phi-2-orange-GGUF
+
+tags:
+- llm
+- gguf
+- gpu
+- cpu
+
+config_file: |
+ mmap: true
+ template:
+ chat_message: |
+ <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
+ {{if .Content}}{{.Content}}{{end}}<|im_end|>
+ chat: |
+ {{.Input}}
+ <|im_start|>assistant
+ completion: |
+ {{.Input}}
+ context_size: 4096
+ f16: true
+ stopwords:
+ - <|im_end|>
+ -
diff --git a/gallery/phi-3-chat.yaml b/gallery/phi-3-chat.yaml
new file mode 100644
index 00000000..24dbc20f
--- /dev/null
+++ b/gallery/phi-3-chat.yaml
@@ -0,0 +1,31 @@
+name: "phi-3-chat"
+license: mit
+
+description: |
+ The Phi-3-Mini-4K-Instruct is a 3.8B parameters, lightweight, state-of-the-art open model trained with the Phi-3 datasets that includes both synthetic data and the filtered publicly available websites data with a focus on high-quality and reasoning dense properties. The model belongs to the Phi-3 family with the Mini version in two variants 4K and 128K which is the context length (in tokens) it can support. The model has underwent a post-training process that incorporates both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures. When assessed against benchmarks testing common sense, language understanding, math, code, long context and logical reasoning, Phi-3 Mini-4K-Instruct showcased a robust and state-of-the-art performance among models with less than 13 billion parameters.
+
+urls:
+- https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf
+
+tags:
+- llm
+- gguf
+- gpu
+- cpu
+
+config_file: |
+ mmap: true
+ template:
+ chat_message: |
+ <|{{ .RoleName }}|>
+ {{.Content}}<|end|>
+ chat: |
+ {{.Input}}
+ <|assistant|>
+ completion: |
+ {{.Input}}
+ context_size: 4096
+ f16: true
+ stopwords:
+ - <|end|>
+
diff --git a/gallery/piper.yaml b/gallery/piper.yaml
new file mode 100644
index 00000000..d759ba92
--- /dev/null
+++ b/gallery/piper.yaml
@@ -0,0 +1,15 @@
+config_file: |
+ backend: piper
+icon: https://github.com/rhasspy/piper/raw/master/etc/logo.png
+license: mit
+
+urls:
+ - https://github.com/rhasspy/piper
+
+description: |
+ A fast, local neural text to speech system that sounds great and is optimized for the Raspberry Pi 4. Piper is used in a variety of [projects](https://github.com/rhasspy/piper#people-using-piper).
+
+tags:
+- tts
+- text-to-speech
+- cpu
diff --git a/gallery/sentencetransformers.yaml b/gallery/sentencetransformers.yaml
new file mode 100644
index 00000000..1830cce3
--- /dev/null
+++ b/gallery/sentencetransformers.yaml
@@ -0,0 +1,12 @@
+name: "sentencetransformers"
+description: |
+ This framework provides an easy method to compute dense vector representations for sentences, paragraphs, and images. The models are based on transformer networks like BERT / RoBERTa / XLM-RoBERTa etc. and achieve state-of-the-art performance in various tasks. Text is embedded in vector space such that similar text are closer and can efficiently be found using cosine similarity.
+ urls:
+ - https://github.com/UKPLab/sentence-transformers
+tags:
+- gpu
+- cpu
+- embeddings
+
+config_file: |
+ backend: sentencetransformers
\ No newline at end of file
From 55778b35fff7909927e7699a8232eceec0f5c340 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Tue, 23 Apr 2024 19:47:42 +0200
Subject: [PATCH 0208/2750] fix(gallery): move metadata where it belongs
Signed-off-by: Ettore Di Giacinto
---
gallery/bert-embeddings.yaml | 6 +-
gallery/codellama.yaml | 14 --
gallery/dreamshaper.yaml | 13 --
gallery/hermes-2-pro-mistral.yaml | 21 --
gallery/index.yaml | 375 ++++++++++++++++++++++++------
gallery/llama3-instruct.yaml | 21 --
gallery/llava.yaml | 13 --
gallery/phi-2-chat.yaml | 31 ---
gallery/phi-2-orange.yaml | 15 --
gallery/phi-3-chat.yaml | 13 --
gallery/piper.yaml | 13 --
gallery/sentencetransformers.yaml | 8 -
gallery/stablediffusion.yaml | 6 -
gallery/tinydream.yaml | 6 -
gallery/whisper-base.yaml | 6 -
15 files changed, 310 insertions(+), 251 deletions(-)
diff --git a/gallery/bert-embeddings.yaml b/gallery/bert-embeddings.yaml
index 0798bf54..01f05f33 100644
--- a/gallery/bert-embeddings.yaml
+++ b/gallery/bert-embeddings.yaml
@@ -1,9 +1,5 @@
name: "bert-embeddings"
-license: "Apache 2.0"
-urls:
-- https://huggingface.co/skeskinen/ggml
-description: |
- Bert model that can be used for embeddings
+
config_file: |
parameters:
model: bert-MiniLM-L6-v2q4_0.bin
diff --git a/gallery/codellama.yaml b/gallery/codellama.yaml
index 1b773ed6..a4c3233f 100644
--- a/gallery/codellama.yaml
+++ b/gallery/codellama.yaml
@@ -1,18 +1,4 @@
name: "codellama"
-license: llama2
-
-description: |
- Code Llama is a collection of pretrained and fine-tuned generative text models ranging in scale from 7 billion to 34 billion parameters. This model is designed for general code synthesis and understanding.
-
-urls:
-- https://huggingface.co/TheBloke/CodeLlama-7B-GGUF
-- https://huggingface.co/meta-llama/CodeLlama-7b-hf
-
-tags:
-- llm
-- gguf
-- gpu
-- cpu
config_file: |
backend: llama-cpp
diff --git a/gallery/dreamshaper.yaml b/gallery/dreamshaper.yaml
index 894ae0cf..219a1e53 100644
--- a/gallery/dreamshaper.yaml
+++ b/gallery/dreamshaper.yaml
@@ -1,18 +1,5 @@
name: "dreamshaper"
-icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/ggO2sBDJ8Bhc6w-zwTx5j.png
-license: other
-description: |
- A text-to-image model that uses Stable Diffusion 1.5 to generate images from text prompts. This model is DreamShaper model by Lykon.
-
-urls:
-- https://civitai.com/models/4384/dreamshaper
-
-tags:
-- text-to-image
-- stablediffusion
-- sd-1.5
-- gpu
config_file: |
backend: diffusers
diff --git a/gallery/hermes-2-pro-mistral.yaml b/gallery/hermes-2-pro-mistral.yaml
index 5a79d5cb..d4771a11 100644
--- a/gallery/hermes-2-pro-mistral.yaml
+++ b/gallery/hermes-2-pro-mistral.yaml
@@ -1,26 +1,5 @@
name: "hermes-2-pro-mistral"
-icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/ggO2sBDJ8Bhc6w-zwTx5j.png
-license: apache-2.0
-description: |
- Hermes 2 Pro is an upgraded, retrained version of Nous Hermes 2, consisting of an updated and cleaned version of the OpenHermes 2.5 Dataset, as well as a newly introduced Function Calling and JSON Mode dataset developed in-house.
-
- This new version of Hermes maintains its excellent general task and conversation capabilities - but also excels at Function Calling, JSON Structured Outputs, and has improved on several other metrics as well, scoring a 90% on our function calling evaluation built in partnership with Fireworks.AI, and an 81% on our structured JSON Output evaluation.
-
- Hermes Pro takes advantage of a special system prompt and multi-turn function calling structure with a new chatml role in order to make function calling reliable and easy to parse. Learn more about prompting below.
-
- This work was a collaboration between Nous Research, @interstellarninja, and Fireworks.AI
-
- Learn more about the function calling on our github repo here: https://github.com/NousResearch/Hermes-Function-Calling/tree/main
-
-urls:
-- https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF
-
-tags:
-- llm
-- gguf
-- gpu
-- cpu
config_file: |
mmap: true
diff --git a/gallery/index.yaml b/gallery/index.yaml
index 4582838e..bb1c5250 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -4,6 +4,28 @@
- &llama3
url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
name: "llama3-8b-instruct"
+ license: llama3
+
+ description: |
+ Meta developed and released the Meta Llama 3 family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8 and 70B sizes. The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks. Further, in developing these models, we took great care to optimize helpfulness and safety.
+
+ Model developers Meta
+
+ Variations Llama 3 comes in two sizes — 8B and 70B parameters — in pre-trained and instruction tuned variants.
+
+ Input Models input text only.
+
+ Output Models generate text and code only.
+
+ Model Architecture Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.
+ urls:
+ - https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct
+
+ tags:
+ - llm
+ - gguf
+ - gpu
+ - cpu
overrides:
parameters:
model: Meta-Llama-3-8B-Instruct-Q5_K_M.gguf
@@ -15,6 +37,20 @@
### START LLaVa
- &llava
url: "github:mudler/LocalAI/gallery/llava.yaml@master"
+ license: apache-2.0
+
+ description: |
+ LLaVA represents a novel end-to-end trained large multimodal model that combines a vision encoder and Vicuna for general-purpose visual and language understanding, achieving impressive chat capabilities mimicking spirits of the multimodal GPT-4 and setting a new state-of-the-art accuracy on Science QA.
+
+ urls:
+ - https://llava-vl.github.io/
+
+ tags:
+ - llm
+ - multimodal
+ - gguf
+ - gpu
+ - cpu
name: "llava-1.6-vicuna"
overrides:
mmproj: mmproj-vicuna7b-f16.gguf
@@ -52,8 +88,36 @@
sha256: 09c230de47f6f843e4841656f7895cac52c6e7ec7392acb5e8527de8b775c45a
uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-mmproj-Q8_0.gguf
### START Phi-2
-- &phi-2
+- &phi-2-chat
url: "github:mudler/LocalAI/gallery/phi-2-chat.yaml@master"
+ license: mit
+
+ description: |
+ Phi-2 fine-tuned by the OpenHermes 2.5 dataset optimised for multi-turn conversation and character impersonation.
+
+ The dataset has been pre-processed by doing the following:
+
+ - remove all refusals
+ - remove any mention of AI assistant
+ - split any multi-turn dialog generated in the dataset into multi-turn conversations records
+ - added nfsw generated conversations from the Teatime dataset
+
+ Developed by: l3utterfly
+ Funded by: Layla Network
+ Model type: Phi
+ Language(s) (NLP): English
+ License: MIT
+ Finetuned from model: Phi-2
+
+ urls:
+ - https://huggingface.co/l3utterfly/phi-2-layla-v1-chatml
+ - https://huggingface.co/l3utterfly/phi-2-layla-v1-chatml-gguf
+
+ tags:
+ - llm
+ - gguf
+ - gpu
+ - cpu
name: "phi-2-chat:Q8_0"
overrides:
parameters:
@@ -62,7 +126,7 @@
- filename: "phi-2-layla-v1-chatml-Q8_0.gguf"
sha256: "0cf542a127c2c835066a78028009b7eddbaf773cc2a26e1cb157ce5e09c1a2e0"
uri: "huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf"
-- <<: *phi-2
+- <<: *phi-2-chat
name: "phi-2-chat"
overrides:
parameters:
@@ -71,7 +135,22 @@
- filename: "phi-2-layla-v1-chatml-Q4_K.gguf"
sha256: "b071e5624b60b8911f77261398802c4b4079c6c689e38e2ce75173ed62bc8a48"
uri: "huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q4_K.gguf"
-- <<: *phi-2
+- <<: *phi-2-chat
+ license: mit
+ icon: "https://huggingface.co/rhysjones/phi-2-orange/resolve/main/phi-2-orange.jpg"
+ description: |
+ A two-step finetune of Phi-2, with a bit of zest.
+
+ There is an updated model at rhysjones/phi-2-orange-v2 which has higher evals, if you wish to test.
+ urls:
+ - https://huggingface.co/rhysjones/phi-2-orange
+ - https://huggingface.co/TheBloke/phi-2-orange-GGUF
+
+ tags:
+ - llm
+ - gguf
+ - gpu
+ - cpu
name: "phi-2-orange"
overrides:
parameters:
@@ -84,6 +163,19 @@
- &phi-3
url: "github:mudler/LocalAI/gallery/phi-3-chat.yaml@master"
name: "phi-3-mini-4k-instruct"
+ license: mit
+
+ description: |
+ The Phi-3-Mini-4K-Instruct is a 3.8B parameters, lightweight, state-of-the-art open model trained with the Phi-3 datasets that includes both synthetic data and the filtered publicly available websites data with a focus on high-quality and reasoning dense properties. The model belongs to the Phi-3 family with the Mini version in two variants 4K and 128K which is the context length (in tokens) it can support. The model has underwent a post-training process that incorporates both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures. When assessed against benchmarks testing common sense, language understanding, math, code, long context and logical reasoning, Phi-3 Mini-4K-Instruct showcased a robust and state-of-the-art performance among models with less than 13 billion parameters.
+
+ urls:
+ - https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf
+
+ tags:
+ - llm
+ - gguf
+ - gpu
+ - cpu
overrides:
parameters:
model: Phi-3-mini-4k-instruct-q4.gguf
@@ -104,6 +196,28 @@
- &hermes-2-pro-mistral
url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"
name: "hermes-2-pro-mistral"
+ icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/ggO2sBDJ8Bhc6w-zwTx5j.png
+ license: apache-2.0
+
+ description: |
+ Hermes 2 Pro is an upgraded, retrained version of Nous Hermes 2, consisting of an updated and cleaned version of the OpenHermes 2.5 Dataset, as well as a newly introduced Function Calling and JSON Mode dataset developed in-house.
+
+ This new version of Hermes maintains its excellent general task and conversation capabilities - but also excels at Function Calling, JSON Structured Outputs, and has improved on several other metrics as well, scoring a 90% on our function calling evaluation built in partnership with Fireworks.AI, and an 81% on our structured JSON Output evaluation.
+
+ Hermes Pro takes advantage of a special system prompt and multi-turn function calling structure with a new chatml role in order to make function calling reliable and easy to parse. Learn more about prompting below.
+
+ This work was a collaboration between Nous Research, @interstellarninja, and Fireworks.AI
+
+ Learn more about the function calling on our github repo here: https://github.com/NousResearch/Hermes-Function-Calling/tree/main
+
+ urls:
+ - https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF
+
+ tags:
+ - llm
+ - gguf
+ - gpu
+ - cpu
overrides:
parameters:
model: Hermes-2-Pro-Mistral-7B.Q4_0.gguf
@@ -135,6 +249,20 @@
- &codellama
url: "github:mudler/LocalAI/gallery/codellama.yaml@master"
name: "codellama-7b"
+ license: llama2
+
+ description: |
+ Code Llama is a collection of pretrained and fine-tuned generative text models ranging in scale from 7 billion to 34 billion parameters. This model is designed for general code synthesis and understanding.
+
+ urls:
+ - https://huggingface.co/TheBloke/CodeLlama-7B-GGUF
+ - https://huggingface.co/meta-llama/CodeLlama-7b-hf
+
+ tags:
+ - llm
+ - gguf
+ - gpu
+ - cpu
overrides:
parameters:
model: codellama-7b.Q4_0.gguf
@@ -145,6 +273,14 @@
### START Embeddings
- &sentencentransformers
+ description: |
+ This framework provides an easy method to compute dense vector representations for sentences, paragraphs, and images. The models are based on transformer networks like BERT / RoBERTa / XLM-RoBERTa etc. and achieve state-of-the-art performance in various tasks. Text is embedded in vector space such that similar text are closer and can efficiently be found using cosine similarity.
+ urls:
+ - https://github.com/UKPLab/sentence-transformers
+ tags:
+ - gpu
+ - cpu
+ - embeddings
name: "all-MiniLM-L6-v2"
url: "github:mudler/LocalAI/gallery/sentencetransformers.yaml@master"
overrides:
@@ -152,8 +288,22 @@
model: all-MiniLM-L6-v2
### START Image generation
-- &diffusers
+- &dreamshaper
name: dreamshaper
+ icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/ggO2sBDJ8Bhc6w-zwTx5j.png
+ license: other
+
+ description: |
+ A text-to-image model that uses Stable Diffusion 1.5 to generate images from text prompts. This model is DreamShaper model by Lykon.
+
+ urls:
+ - https://civitai.com/models/4384/dreamshaper
+
+ tags:
+ - text-to-image
+ - stablediffusion
+ - sd-1.5
+ - gpu
url: "github:mudler/LocalAI/gallery/dreamshaper.yaml@master"
overrides:
parameters:
@@ -166,32 +316,71 @@
## Whisper
- url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master"
name: "whisper-1"
+ license: "MIT"
+ urls:
+ - https://github.com/ggerganov/whisper.cpp
+ - https://huggingface.co/ggerganov/whisper.cpp
+
+ description: |
+ Port of OpenAI's Whisper model in C/C++
## Bert embeddings
- url: "github:mudler/LocalAI/gallery/bert-embeddings.yaml@master"
name: "bert-embeddings"
-
-- url: "github:mudler/LocalAI/gallery/bert-embeddings.yaml@master"
- name: "text-embedding-ada-002"
-
+ license: "Apache 2.0"
+ urls:
+ - https://huggingface.co/skeskinen/ggml
+ tags:
+ - embeddings
+ description: |
+ Bert model that can be used for embeddings
+
## Stable Diffusion
- url: github:mudler/LocalAI/gallery/stablediffusion.yaml@master
- name: stablediffusion
+ license: "BSD-3"
+ urls:
+ - https://github.com/EdVince/Stable-Diffusion-NCNN
+ - https://github.com/EdVince/Stable-Diffusion-NCNN/blob/main/LICENSE
+
+ description: |
+ Stable Diffusion in NCNN with c++, supported txt2img and img2img
+ name: stablediffusion-cpp
## Tiny Dream
- url: github:mudler/LocalAI/gallery/tinydream.yaml@master
name: tinydream
-
+ license: "BSD-3"
+ urls:
+ - https://github.com/symisc/tiny-dream
+ - https://github.com/symisc/tiny-dream/blob/main/LICENSE
+
+ description: |
+ An embedded, Header Only, Stable Diffusion C++ implementation
## Piper TTS
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- &piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-en-us-kathleen-low
+ icon: https://github.com/rhasspy/piper/raw/master/etc/logo.png
+ license: mit
+
+ urls:
+ - https://github.com/rhasspy/piper
+
+ description: |
+ A fast, local neural text to speech system that sounds great and is optimized for the Raspberry Pi 4. Piper is used in a variety of [projects](https://github.com/rhasspy/piper#people-using-piper).
+
+ tags:
+ - tts
+ - text-to-speech
+ - cpu
+
override:
parameters:
model: en-us-kathleen-low.onnx
files:
- filename: voice-en-us-kathleen-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-kathleen-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
name: voice-ca-upc_ona-x-low
override:
parameters:
@@ -199,7 +388,8 @@
files:
- filename: voice-ca-upc_ona-x-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ca-upc_ona-x-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-ca-upc_pau-x-low
override:
parameters:
@@ -207,7 +397,8 @@
files:
- filename: voice-ca-upc_pau-x-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ca-upc_pau-x-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-da-nst_talesyntese-medium
override:
parameters:
@@ -215,7 +406,8 @@
files:
- filename: voice-da-nst_talesyntese-medium.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-da-nst_talesyntese-medium.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-de-eva_k-x-low
override:
parameters:
@@ -223,7 +415,8 @@
files:
- filename: voice-de-eva_k-x-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-eva_k-x-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-de-karlsson-low
override:
parameters:
@@ -231,7 +424,8 @@
files:
- filename: voice-de-karlsson-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-karlsson-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-de-kerstin-low
override:
parameters:
@@ -239,7 +433,8 @@
files:
- filename: voice-de-kerstin-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-kerstin-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-de-pavoque-low
override:
parameters:
@@ -247,7 +442,8 @@
files:
- filename: voice-de-pavoque-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-pavoque-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-de-ramona-low
override:
parameters:
@@ -255,7 +451,8 @@
files:
- filename: voice-de-ramona-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-ramona-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-de-thorsten-low
override:
@@ -264,7 +461,8 @@
files:
- filename: voice-de-thorsten-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-thorsten-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-el-gr-rapunzelina-low
override:
@@ -273,7 +471,8 @@
files:
- filename: voice-el-gr-rapunzelina-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-el-gr-rapunzelina-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-en-gb-alan-low
override:
@@ -282,7 +481,8 @@
files:
- filename: voice-en-gb-alan-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-gb-alan-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-en-gb-southern_english_female-low
override:
@@ -291,7 +491,8 @@
files:
- filename: voice-en-gb-southern_english_female-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-gb-southern_english_female-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-en-us-amy-low
override:
@@ -300,7 +501,8 @@
files:
- filename: voice-en-us-amy-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-en-us-danny-low
override:
@@ -309,7 +511,8 @@
files:
- filename: voice-en-us-danny-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-danny-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-en-us-kathleen-low
override:
@@ -318,7 +521,8 @@
files:
- filename: voice-en-us-kathleen-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-kathleen-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-en-us-lessac-low
override:
@@ -327,7 +531,8 @@
files:
- filename: voice-en-us-lessac-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-lessac-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-en-us-lessac-medium
override:
@@ -336,7 +541,8 @@
files:
- filename: voice-en-us-lessac-medium.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-lessac-medium.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-en-us-libritts-high
override:
@@ -345,7 +551,8 @@
files:
- filename: voice-en-us-libritts-high.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-libritts-high.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-en-us-ryan-high
override:
@@ -354,7 +561,8 @@
files:
- filename: voice-en-us-ryan-high.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-ryan-high.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-en-us-ryan-low
override:
@@ -364,7 +572,8 @@
- filename: voice-en-us-ryan-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-ryan-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-en-us-ryan-medium
override:
@@ -374,7 +583,8 @@
- filename: voice-en-us-ryan-medium.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-ryan-medium.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-en-us_lessac
override:
parameters:
@@ -383,7 +593,8 @@
- filename: voice-en-us_lessac.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us_lessac.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-es-carlfm-x-low
override:
parameters:
@@ -392,7 +603,8 @@
- filename: voice-es-carlfm-x-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-es-carlfm-x-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-es-mls_10246-low
override:
@@ -402,7 +614,8 @@
- filename: voice-es-mls_10246-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-es-mls_10246-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-es-mls_9972-low
override:
@@ -412,7 +625,8 @@
- filename: voice-es-mls_9972-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-es-mls_9972-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-fi-harri-low
override:
@@ -422,7 +636,8 @@
- filename: voice-fi-harri-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fi-harri-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-fr-gilles-low
override:
@@ -432,7 +647,8 @@
- filename: voice-fr-gilles-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-gilles-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-fr-mls_1840-low
override:
@@ -442,7 +658,8 @@
- filename: voice-fr-mls_1840-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-mls_1840-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-fr-siwis-low
override:
@@ -452,7 +669,8 @@
- filename: voice-fr-siwis-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-siwis-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-fr-siwis-medium
override:
@@ -462,7 +680,8 @@
- filename: voice-fr-siwis-medium.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-siwis-medium.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-is-bui-medium
override:
@@ -472,7 +691,8 @@
- filename: voice-is-bui-medium.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-is-bui-medium.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-is-salka-medium
override:
@@ -482,7 +702,8 @@
- filename: voice-is-salka-medium.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-is-salka-medium.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-is-steinn-medium
override:
@@ -492,7 +713,8 @@
- filename: voice-is-steinn-medium.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-is-steinn-medium.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-is-ugla-medium
override:
@@ -502,7 +724,8 @@
- filename: voice-is-ugla-medium.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-is-ugla-medium.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-it-riccardo_fasol-x-low
override:
@@ -512,7 +735,8 @@
- filename: voice-it-riccardo_fasol-x-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-it-riccardo_fasol-x-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-kk-iseke-x-low
override:
@@ -522,7 +746,8 @@
- filename: voice-kk-iseke-x-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-kk-iseke-x-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-kk-issai-high
override:
@@ -532,7 +757,8 @@
- filename: voice-kk-issai-high.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-kk-issai-high.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-kk-raya-x-low
override:
@@ -542,7 +768,8 @@
- filename: voice-kk-raya-x-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-kk-raya-x-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-ne-google-medium
override:
@@ -552,7 +779,8 @@
- filename: voice-ne-google-medium.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ne-google-medium.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-ne-google-x-low
override:
@@ -562,7 +790,8 @@
- filename: voice-ne-google-x-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ne-google-x-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-nl-mls_5809-low
override:
@@ -572,7 +801,8 @@
- filename: voice-nl-mls_5809-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-mls_5809-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-nl-mls_7432-low
override:
@@ -582,7 +812,8 @@
- filename: voice-nl-mls_7432-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-mls_7432-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-nl-nathalie-x-low
override:
@@ -592,7 +823,8 @@
- filename: voice-nl-nathalie-x-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-nathalie-x-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-nl-rdh-medium
override:
@@ -602,7 +834,8 @@
- filename: voice-nl-rdh-medium.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-rdh-medium.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-nl-rdh-x-low
override:
@@ -612,7 +845,8 @@
- filename: voice-nl-rdh-x-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-rdh-x-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-no-talesyntese-medium
override:
@@ -622,7 +856,8 @@
- filename: voice-no-talesyntese-medium.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-no-talesyntese-medium.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-pl-mls_6892-low
override:
@@ -632,7 +867,8 @@
- filename: voice-pl-mls_6892-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-pl-mls_6892-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-pt-br-edresson-low
override:
@@ -642,7 +878,8 @@
- filename: voice-pt-br-edresson-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-pt-br-edresson-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-ru-irinia-medium
override:
@@ -652,7 +889,8 @@
- filename: voice-ru-irinia-medium.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ru-irinia-medium.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-sv-se-nst-medium
override:
@@ -662,7 +900,8 @@
- filename: voice-sv-se-nst-medium.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-sv-se-nst-medium.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-uk-lada-x-low
override:
@@ -672,7 +911,8 @@
- filename: voice-uk-lada-x-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-uk-lada-x-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-vi-25hours-single-low
override:
@@ -682,7 +922,8 @@
- filename: voice-vi-25hours-single-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-vi-25hours-single-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-vi-vivos-x-low
override:
@@ -692,7 +933,8 @@
- filename: voice-vi-vivos-x-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-vi-vivos-x-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-zh-cn-huayan-x-low
override:
@@ -702,7 +944,8 @@
- filename: voice-zh-cn-huayan-x-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-zh-cn-huayan-x-low.tar.gz
-- url: github:mudler/LocalAI/gallery/piper.yaml@master
+- <<: *piper
+ url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-zh_CN-huayan-medium
override:
diff --git a/gallery/llama3-instruct.yaml b/gallery/llama3-instruct.yaml
index 4e29e740..96272c58 100644
--- a/gallery/llama3-instruct.yaml
+++ b/gallery/llama3-instruct.yaml
@@ -1,26 +1,5 @@
name: "llama3-instruct"
-license: llama3
-description: |
- Meta developed and released the Meta Llama 3 family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8 and 70B sizes. The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks. Further, in developing these models, we took great care to optimize helpfulness and safety.
-
- Model developers Meta
-
- Variations Llama 3 comes in two sizes — 8B and 70B parameters — in pre-trained and instruction tuned variants.
-
- Input Models input text only.
-
- Output Models generate text and code only.
-
- Model Architecture Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.
-urls:
-- https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct
-
-tags:
-- llm
-- gguf
-- gpu
-- cpu
config_file: |
mmap: true
diff --git a/gallery/llava.yaml b/gallery/llava.yaml
index 159ae34c..44c1aa97 100644
--- a/gallery/llava.yaml
+++ b/gallery/llava.yaml
@@ -1,18 +1,5 @@
name: "llava"
-license: apache-2.0
-description: |
- LLaVA represents a novel end-to-end trained large multimodal model that combines a vision encoder and Vicuna for general-purpose visual and language understanding, achieving impressive chat capabilities mimicking spirits of the multimodal GPT-4 and setting a new state-of-the-art accuracy on Science QA.
-
-urls:
-- https://llava-vl.github.io/
-
-tags:
-- llm
-- multimodal
-- gguf
-- gpu
-- cpu
config_file: |
backend: llama-cpp
diff --git a/gallery/phi-2-chat.yaml b/gallery/phi-2-chat.yaml
index 3370311f..3fc84d3b 100644
--- a/gallery/phi-2-chat.yaml
+++ b/gallery/phi-2-chat.yaml
@@ -1,39 +1,8 @@
name: "phi-2-chatml"
-license: mit
-description: |
- Phi-2 fine-tuned by the OpenHermes 2.5 dataset optimised for multi-turn conversation and character impersonation.
-
- The dataset has been pre-processed by doing the following:
-
- - remove all refusals
- - remove any mention of AI assistant
- - split any multi-turn dialog generated in the dataset into multi-turn conversations records
- - added nfsw generated conversations from the Teatime dataset
-
- Developed by: l3utterfly
- Funded by: Layla Network
- Model type: Phi
- Language(s) (NLP): English
- License: MIT
- Finetuned from model: Phi-2
-
-
-urls:
-- https://huggingface.co/l3utterfly/phi-2-layla-v1-chatml
-- https://huggingface.co/l3utterfly/phi-2-layla-v1-chatml-gguf
-
-tags:
-- llm
-- gguf
-- gpu
-- cpu
config_file: |
mmap: true
- # parameters:
- # model: huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf
-
template:
chat_message: |
<|im_start|>{{ .RoleName }}
diff --git a/gallery/phi-2-orange.yaml b/gallery/phi-2-orange.yaml
index 9800f8da..645875ad 100644
--- a/gallery/phi-2-orange.yaml
+++ b/gallery/phi-2-orange.yaml
@@ -1,19 +1,4 @@
name: "phi-2-orange"
-license: mit
-icon: "https://huggingface.co/rhysjones/phi-2-orange/resolve/main/phi-2-orange.jpg"
-description: |
- A two-step finetune of Phi-2, with a bit of zest.
-
- There is an updated model at rhysjones/phi-2-orange-v2 which has higher evals, if you wish to test.
-urls:
-- https://huggingface.co/rhysjones/phi-2-orange
-- https://huggingface.co/TheBloke/phi-2-orange-GGUF
-
-tags:
-- llm
-- gguf
-- gpu
-- cpu
config_file: |
mmap: true
diff --git a/gallery/phi-3-chat.yaml b/gallery/phi-3-chat.yaml
index 24dbc20f..b17e5bb4 100644
--- a/gallery/phi-3-chat.yaml
+++ b/gallery/phi-3-chat.yaml
@@ -1,17 +1,4 @@
name: "phi-3-chat"
-license: mit
-
-description: |
- The Phi-3-Mini-4K-Instruct is a 3.8B parameters, lightweight, state-of-the-art open model trained with the Phi-3 datasets that includes both synthetic data and the filtered publicly available websites data with a focus on high-quality and reasoning dense properties. The model belongs to the Phi-3 family with the Mini version in two variants 4K and 128K which is the context length (in tokens) it can support. The model has underwent a post-training process that incorporates both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures. When assessed against benchmarks testing common sense, language understanding, math, code, long context and logical reasoning, Phi-3 Mini-4K-Instruct showcased a robust and state-of-the-art performance among models with less than 13 billion parameters.
-
-urls:
-- https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf
-
-tags:
-- llm
-- gguf
-- gpu
-- cpu
config_file: |
mmap: true
diff --git a/gallery/piper.yaml b/gallery/piper.yaml
index d759ba92..eb1a6ecc 100644
--- a/gallery/piper.yaml
+++ b/gallery/piper.yaml
@@ -1,15 +1,2 @@
config_file: |
backend: piper
-icon: https://github.com/rhasspy/piper/raw/master/etc/logo.png
-license: mit
-
-urls:
- - https://github.com/rhasspy/piper
-
-description: |
- A fast, local neural text to speech system that sounds great and is optimized for the Raspberry Pi 4. Piper is used in a variety of [projects](https://github.com/rhasspy/piper#people-using-piper).
-
-tags:
-- tts
-- text-to-speech
-- cpu
diff --git a/gallery/sentencetransformers.yaml b/gallery/sentencetransformers.yaml
index 1830cce3..9ba5d29b 100644
--- a/gallery/sentencetransformers.yaml
+++ b/gallery/sentencetransformers.yaml
@@ -1,12 +1,4 @@
name: "sentencetransformers"
-description: |
- This framework provides an easy method to compute dense vector representations for sentences, paragraphs, and images. The models are based on transformer networks like BERT / RoBERTa / XLM-RoBERTa etc. and achieve state-of-the-art performance in various tasks. Text is embedded in vector space such that similar text are closer and can efficiently be found using cosine similarity.
- urls:
- - https://github.com/UKPLab/sentence-transformers
-tags:
-- gpu
-- cpu
-- embeddings
config_file: |
backend: sentencetransformers
\ No newline at end of file
diff --git a/gallery/stablediffusion.yaml b/gallery/stablediffusion.yaml
index c8a0eb8b..9b1cad32 100644
--- a/gallery/stablediffusion.yaml
+++ b/gallery/stablediffusion.yaml
@@ -1,11 +1,5 @@
name: "stablediffusion-cpp"
-license: "BSD-3"
-urls:
-- https://github.com/EdVince/Stable-Diffusion-NCNN
-- https://github.com/EdVince/Stable-Diffusion-NCNN/blob/main/LICENSE
-description: |
- Stable Diffusion in NCNN with c++, supported txt2img and img2img
config_file: |
name: stablediffusion-cpp
backend: stablediffusion
diff --git a/gallery/tinydream.yaml b/gallery/tinydream.yaml
index 415762de..6e39414c 100644
--- a/gallery/tinydream.yaml
+++ b/gallery/tinydream.yaml
@@ -1,11 +1,5 @@
name: "tinydream"
-license: "BSD-3"
-urls:
- - https://github.com/symisc/tiny-dream
- - https://github.com/symisc/tiny-dream/blob/main/LICENSE
-description: |
- An embedded, Header Only, Stable Diffusion C++ implementation
config_file: |
name: tinydream
backend: tinydream
diff --git a/gallery/whisper-base.yaml b/gallery/whisper-base.yaml
index 574dbb13..f654a37c 100644
--- a/gallery/whisper-base.yaml
+++ b/gallery/whisper-base.yaml
@@ -1,11 +1,5 @@
name: "whisper-base"
-license: "MIT"
-urls:
-- https://github.com/ggerganov/whisper.cpp
-- https://huggingface.co/ggerganov/whisper.cpp
-description: |
- Port of OpenAI's Whisper model in C/C++
config_file: |
backend: whisper
From a09fe1b9babcfec6e91f596b6597a4030d9552fc Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Tue, 23 Apr 2024 20:00:20 +0200
Subject: [PATCH 0209/2750] fix(gallery): set margin for images
Signed-off-by: Ettore Di Giacinto
---
core/http/elements/gallery.go | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/core/http/elements/gallery.go b/core/http/elements/gallery.go
index 405f42ae..f2b4f8dd 100644
--- a/core/http/elements/gallery.go
+++ b/core/http/elements/gallery.go
@@ -209,7 +209,7 @@ func ListModels(models []*gallery.GalleryModel) string {
},
elem.Img(attrs.Props{
// "class": "rounded-t-lg object-fit object-center h-96",
- "class": "rounded-t-lg max-h-48 max-w-96 object-cover",
+ "class": "rounded-t-lg max-h-48 max-w-96 object-cover mt-3",
"src": m.Icon,
}),
),
From d2bea6f9e3c30056b5d1adcfc6dd3ff5fae560af Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Tue, 23 Apr 2024 20:01:56 +0200
Subject: [PATCH 0210/2750] fix(gallery): fixup hermes q8 entry
Signed-off-by: Ettore Di Giacinto
---
gallery/index.yaml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/gallery/index.yaml b/gallery/index.yaml
index bb1c5250..16916703 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -235,7 +235,7 @@
sha256: "40adc3b227bc36764de148fdda4df5df385adc06650d58d4dbe726ee0214eeff"
uri: "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf"
- <<: *hermes-2-pro-mistral
- name: "hermes-2-pro-mistral"
+ name: "hermes-2-pro-mistral:Q8_0"
overrides:
parameters:
model: Hermes-2-Pro-Mistral-7B.Q8_0.gguf
From 34c3f563fd4c50162dc4e64eb4cd9265ac4afb05 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Tue, 23 Apr 2024 20:05:59 +0200
Subject: [PATCH 0211/2750] fix(gallery): fixup dreamshaper icon
Signed-off-by: Ettore Di Giacinto
---
gallery/index.yaml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/gallery/index.yaml b/gallery/index.yaml
index 16916703..deab29cf 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -290,7 +290,7 @@
### START Image generation
- &dreamshaper
name: dreamshaper
- icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/ggO2sBDJ8Bhc6w-zwTx5j.png
+ icon: https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/dd9b038c-bd15-43ab-86ab-66e145ad7ff2/width=450/26072158-132340247-8k%20portrait%20of%20beautiful%20cyborg%20with%20brown%20hair,%20intricate,%20elegant,%20highly%20detailed,%20majestic,%20digital%20photography,%20art%20by%20artg_ed.jpeg
license: other
description: |
From ac56ac2b2da3bba78122b2e80eb36afc28e51056 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Tue, 23 Apr 2024 20:10:58 +0200
Subject: [PATCH 0212/2750] fix(gallery): show a fake image if no there is no
icon (#2111)
Signed-off-by: Ettore Di Giacinto
---
core/http/elements/gallery.go | 40 ++++++++++++++++++++---------------
1 file changed, 23 insertions(+), 17 deletions(-)
diff --git a/core/http/elements/gallery.go b/core/http/elements/gallery.go
index f2b4f8dd..c03750da 100644
--- a/core/http/elements/gallery.go
+++ b/core/http/elements/gallery.go
@@ -8,6 +8,10 @@ import (
"github.com/go-skynet/LocalAI/pkg/gallery"
)
+const (
+ NoImage = "https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg"
+)
+
func DoneProgress(uid string) string {
return elem.Div(
attrs.Props{},
@@ -197,25 +201,27 @@ func ListModels(models []*gallery.GalleryModel) string {
elems := []elem.Node{}
- if m.Icon != "" {
- elems = append(elems,
-
- elem.Div(attrs.Props{
- "class": "flex justify-center items-center",
- },
- elem.A(attrs.Props{
- "href": "#!",
- // "class": "justify-center items-center",
- },
- elem.Img(attrs.Props{
- // "class": "rounded-t-lg object-fit object-center h-96",
- "class": "rounded-t-lg max-h-48 max-w-96 object-cover mt-3",
- "src": m.Icon,
- }),
- ),
- ))
+ if m.Icon == "" {
+ m.Icon = NoImage
}
+ elems = append(elems,
+
+ elem.Div(attrs.Props{
+ "class": "flex justify-center items-center",
+ },
+ elem.A(attrs.Props{
+ "href": "#!",
+ // "class": "justify-center items-center",
+ },
+ elem.Img(attrs.Props{
+ // "class": "rounded-t-lg object-fit object-center h-96",
+ "class": "rounded-t-lg max-h-48 max-w-96 object-cover mt-3",
+ "src": m.Icon,
+ }),
+ ),
+ ))
+
elems = append(elems, descriptionDiv(m), actionDiv(m))
modelsElements = append(modelsElements,
elem.Div(
From f718a391c03c1b1ac870e9a083ca686613bac48f Mon Sep 17 00:00:00 2001
From: fakezeta
Date: Wed, 24 Apr 2024 02:45:37 +0200
Subject: [PATCH 0213/2750] fix missing TrustRemoteCode in OpenVINO model load
(#2114)
---
backend/python/transformers/transformers_server.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py
index 90053ed5..2f4140c2 100755
--- a/backend/python/transformers/transformers_server.py
+++ b/backend/python/transformers/transformers_server.py
@@ -149,6 +149,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
device_map="CPU"
self.model = OVModelForCausalLM.from_pretrained(model_name,
compile=True,
+ trust_remote_code=request.TrustRemoteCode,
ov_config={"PERFORMANCE_HINT": "LATENCY"},
device=device_map)
self.OV = True
From 2fb34b00b5c5daa1b60c46a5b535d30c5acf35fc Mon Sep 17 00:00:00 2001
From: jtwolfe
Date: Wed, 24 Apr 2024 17:17:49 +1000
Subject: [PATCH 0214/2750] Incl ocv pkg for diffsusers utils (#2115)
* Update diffusers.yml
Signed-off-by: jtwolfe
* Update diffusers-rocm.yml
Signed-off-by: jtwolfe
---------
Signed-off-by: jtwolfe
---
backend/python/diffusers/diffusers-rocm.yml | 1 +
backend/python/diffusers/diffusers.yml | 1 +
2 files changed, 2 insertions(+)
diff --git a/backend/python/diffusers/diffusers-rocm.yml b/backend/python/diffusers/diffusers-rocm.yml
index f261701d..97b2ce0f 100644
--- a/backend/python/diffusers/diffusers-rocm.yml
+++ b/backend/python/diffusers/diffusers-rocm.yml
@@ -61,4 +61,5 @@ dependencies:
- urllib3==2.0.6
- zipp==3.17.0
- torch
+ - opencv-python
prefix: /opt/conda/envs/diffusers
diff --git a/backend/python/diffusers/diffusers.yml b/backend/python/diffusers/diffusers.yml
index b1a7d9f9..d5d2913e 100644
--- a/backend/python/diffusers/diffusers.yml
+++ b/backend/python/diffusers/diffusers.yml
@@ -71,4 +71,5 @@ dependencies:
- typing-extensions==4.8.0
- urllib3==2.0.6
- zipp==3.17.0
+ - opencv-python
prefix: /opt/conda/envs/diffusers
From d65214a234d703812dec346be9fb929579382a6b Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Wed, 24 Apr 2024 11:11:41 +0200
Subject: [PATCH 0215/2750] :arrow_up: Update docs version mudler/LocalAI
(#2113)
Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
docs/data/version.json | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/data/version.json b/docs/data/version.json
index 55eebaeb..6a618115 100644
--- a/docs/data/version.json
+++ b/docs/data/version.json
@@ -1,3 +1,3 @@
{
- "version": "null"
+ "version": "v2.12.4"
}
From 4fffc47e77db8a56bbc89fcac57e6c2ca369789e Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Wed, 24 Apr 2024 18:44:04 +0200
Subject: [PATCH 0216/2750] deps(llama.cpp): update, use better model for
function call tests (#2119)
deps(llama.cpp): update
Signed-off-by: Ettore Di Giacinto
---
Makefile | 2 +-
core/http/app_test.go | 9 ++++-----
2 files changed, 5 insertions(+), 6 deletions(-)
diff --git a/Makefile b/Makefile
index 761c76d6..1923f956 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=b8109bc0139f15a5b321909f47510b89dca47ffc
+CPPLLAMA_VERSION?=4e96a812b3ce7322a29a3008db2ed73d9087b176
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
diff --git a/core/http/app_test.go b/core/http/app_test.go
index 35e0a8bf..3699c0ed 100644
--- a/core/http/app_test.go
+++ b/core/http/app_test.go
@@ -489,11 +489,10 @@ var _ = Describe("API test", func() {
if runtime.GOOS != "linux" {
Skip("test supported only on linux")
}
- modelName := "codellama"
+
+ modelName := "hermes-2-pro-mistral"
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
- URL: "github:go-skynet/model-gallery/codellama-7b-instruct.yaml",
- Name: modelName,
- Overrides: map[string]interface{}{"backend": "llama", "mmap": true, "f16": true, "context_size": 128},
+ ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml",
})
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
@@ -556,7 +555,7 @@ var _ = Describe("API test", func() {
var res map[string]string
err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res)
Expect(err).ToNot(HaveOccurred())
- Expect(res["location"]).To(Equal("San Francisco"), fmt.Sprint(res))
+ Expect(res["location"]).To(ContainSubstring("San Francisco"), fmt.Sprint(res))
Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))
})
From 23eac98b3c4aa62f75fc75ddbaf6a1b81326a22f Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Wed, 24 Apr 2024 19:43:07 +0200
Subject: [PATCH 0217/2750] docs: update hot topics
Signed-off-by: Ettore Di Giacinto