From aeb3f835aef7c80da7ad2ccae433d11449493061 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 10 Apr 2024 09:07:21 +0200 Subject: [PATCH 0001/2648] :arrow_up: Update docs version mudler/LocalAI (#1978) Signed-off-by: GitHub Co-authored-by: mudler --- docs/data/version.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/data/version.json b/docs/data/version.json index b6372479..cc0478ca 100644 --- a/docs/data/version.json +++ b/docs/data/version.json @@ -1,3 +1,3 @@ { - "version": "v2.11.0" + "version": "v2.12.1" } From 951e39d36c06bb14b3b95b27309d7be809f4a3f4 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 10 Apr 2024 09:07:41 +0200 Subject: [PATCH 0002/2648] :arrow_up: Update ggerganov/llama.cpp (#1979) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d42e1a99..b43541ff 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=cc4a95426d17417d3c83f12bdb514fbe8abe2a88 +CPPLLAMA_VERSION?=1b67731e184e27a465b8c5476061294a4af668ea # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 7e2f8bb4083eea3939072dab2cb47261b1b97603 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 10 Apr 2024 09:08:00 +0200 Subject: [PATCH 0003/2648] :arrow_up: Update ggerganov/whisper.cpp (#1980) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b43541ff..337ebc64 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6 # whisper.cpp version -WHISPER_CPP_VERSION?=13c22321d1ac758ce68a429c23104e234b440769 +WHISPER_CPP_VERSION?=8f253ef3af1c62c04316ba4afa7145fc4d701a8c # bert.cpp version BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d From d692b2c32a400a4aa0c6df9a51aa4f3cbe73edff Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 10 Apr 2024 10:31:59 +0200 Subject: [PATCH 0004/2648] ci: push latest images for dockerhub (#1984) Fixes: #1983 Signed-off-by: Ettore Di Giacinto --- .github/workflows/image_build.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index bd244dcf..b0684a4c 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -280,6 +280,7 @@ jobs: run: | docker pull localai/localai:${{ steps.meta.outputs.version }} docker tag localai/localai:${{ steps.meta.outputs.version }} localai/localai:${{ inputs.latest-image }} + docker push localai/localai:${{ inputs.latest-image }} docker pull quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} docker tag quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image }} docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image }} @@ -289,6 +290,7 @@ jobs: run: | docker pull localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }} docker tag localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }} localai/localai:${{ inputs.latest-image-aio }} + docker push localai/localai:${{ inputs.latest-image-aio }} docker pull quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} docker tag quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }} docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }} From d23e73b11828b59a608174dc679eb5a3c2d2f42f Mon Sep 17 00:00:00 2001 From: "Sebastian.W" Date: Wed, 10 Apr 2024 18:36:10 +0800 Subject: [PATCH 0005/2648] fix(autogptq): do not use_triton with qwen-vl (#1985) * Enhance autogptq backend to support VL models * update dependencies for autogptq * remove redundant auto-gptq dependency * Convert base64 to image_url for Qwen-VL model * implemented model inference for qwen-vl * remove user prompt from generated answer * fixed write image error * fixed use_triton issue when loading Qwen-VL model --------- Co-authored-by: Binghua Wu --- backend/python/autogptq/autogptq.py | 1 - 1 file changed, 1 deletion(-) diff --git a/backend/python/autogptq/autogptq.py b/backend/python/autogptq/autogptq.py index bbafdd92..c7c35028 100755 --- a/backend/python/autogptq/autogptq.py +++ b/backend/python/autogptq/autogptq.py @@ -39,7 +39,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): self.model_name = "Qwen-VL-Chat" model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=request.TrustRemoteCode, - use_triton=request.UseTriton, device_map="auto").eval() else: model = AutoGPTQForCausalLM.from_quantized(model_path, From 36da11a0ee4cdf575012c669150d5f617362e619 Mon Sep 17 00:00:00 2001 From: Koen Farell Date: Wed, 10 Apr 2024 14:25:26 +0300 Subject: [PATCH 0006/2648] deps: Update version of vLLM to add support of Cohere Command_R model in vLLM inference (#1975) * Update vLLM version to add support of Command_R Signed-off-by: Koen Farell * fix: Fixed vllm version from requirements Signed-off-by: Koen Farell * chore: Update transformers-rocm.yml Signed-off-by: Koen Farell * chore: Update transformers.yml version of vllm Signed-off-by: Koen Farell --------- Signed-off-by: Koen Farell --- backend/python/common-env/transformers/transformers-nvidia.yml | 2 +- backend/python/common-env/transformers/transformers-rocm.yml | 2 +- backend/python/common-env/transformers/transformers.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/python/common-env/transformers/transformers-nvidia.yml b/backend/python/common-env/transformers/transformers-nvidia.yml index e8d8155b..e12b5dbb 100644 --- a/backend/python/common-env/transformers/transformers-nvidia.yml +++ b/backend/python/common-env/transformers/transformers-nvidia.yml @@ -116,7 +116,7 @@ dependencies: - sudachipy - sudachidict_core - vocos - - vllm==0.3.2 + - vllm>=0.4.0 - transformers>=4.38.2 # Updated Version - transformers_stream_generator==0.0.5 - xformers==0.0.23.post1 diff --git a/backend/python/common-env/transformers/transformers-rocm.yml b/backend/python/common-env/transformers/transformers-rocm.yml index fa245bf4..48fac8bf 100644 --- a/backend/python/common-env/transformers/transformers-rocm.yml +++ b/backend/python/common-env/transformers/transformers-rocm.yml @@ -104,7 +104,7 @@ dependencies: - sudachipy - sudachidict_core - vocos - - vllm==0.3.2 + - vllm>=0.4.0 - transformers>=4.38.2 # Updated Version - transformers_stream_generator==0.0.5 - xformers==0.0.23.post1 diff --git a/backend/python/common-env/transformers/transformers.yml b/backend/python/common-env/transformers/transformers.yml index 3b3b8fe7..843b13fa 100644 --- a/backend/python/common-env/transformers/transformers.yml +++ b/backend/python/common-env/transformers/transformers.yml @@ -108,7 +108,7 @@ dependencies: - sudachipy - sudachidict_core - vocos - - vllm==0.3.2 + - vllm>=0.4.0 - transformers>=4.38.2 # Updated Version - transformers_stream_generator==0.0.5 - xformers==0.0.23.post1 From 93f51d80d41b3b3748da41ad4cb7baf8c762890c Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 10 Apr 2024 16:29:46 +0200 Subject: [PATCH 0007/2648] Update gpt-vision.md Signed-off-by: Ettore Di Giacinto --- docs/content/docs/features/gpt-vision.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docs/content/docs/features/gpt-vision.md b/docs/content/docs/features/gpt-vision.md index 3afcab16..827e2c08 100644 --- a/docs/content/docs/features/gpt-vision.md +++ b/docs/content/docs/features/gpt-vision.md @@ -22,6 +22,17 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}' ``` +Grammars and function tools can be used as well in conjunction with vision APIs: + +```bash + curl http://10.1.0.36:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "llava", "grammar": "root ::= (\"yes\" | \"no\")", + "messages": [{"role": "user", "content": [{"type":"text", "text": "Is there some grass in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}' +``` + ### Setup +All-in-One images have already shipped the llava model as `gpt-4-vision-preview`, so no setup is needed in this case. + To setup the LLaVa models, follow the full example in the [configuration examples](https://github.com/mudler/LocalAI/blob/master/examples/configurations/README.md#llava). + From 636d487dc84c6f1d99ba7630d8851865091c42cb Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 10 Apr 2024 16:30:03 +0200 Subject: [PATCH 0008/2648] Update gpt-vision.md Signed-off-by: Ettore Di Giacinto --- docs/content/docs/features/gpt-vision.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/content/docs/features/gpt-vision.md b/docs/content/docs/features/gpt-vision.md index 827e2c08..9e021273 100644 --- a/docs/content/docs/features/gpt-vision.md +++ b/docs/content/docs/features/gpt-vision.md @@ -25,7 +25,7 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso Grammars and function tools can be used as well in conjunction with vision APIs: ```bash - curl http://10.1.0.36:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ "model": "llava", "grammar": "root ::= (\"yes\" | \"no\")", "messages": [{"role": "user", "content": [{"type":"text", "text": "Is there some grass in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}' ``` From 92005b9c0285f31e7f29ca4f37e6afa194745cf0 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 10 Apr 2024 16:30:57 +0200 Subject: [PATCH 0009/2648] Update openai-functions.md Signed-off-by: Ettore Di Giacinto --- docs/content/docs/features/openai-functions.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/content/docs/features/openai-functions.md b/docs/content/docs/features/openai-functions.md index 843524f4..435101ee 100644 --- a/docs/content/docs/features/openai-functions.md +++ b/docs/content/docs/features/openai-functions.md @@ -144,6 +144,15 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso }' ``` +Grammars and function tools can be used as well in conjunction with vision APIs: + +```bash + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "llava", "grammar": "root ::= (\"yes\" | \"no\")", + "messages": [{"role": "user", "content": [{"type":"text", "text": "Is there some grass in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}' +``` + + ## 💡 Examples A full e2e example with `docker-compose` is available [here](https://github.com/go-skynet/LocalAI/tree/master/examples/functions). From 24d7dadfed6ddf19e91652c3eb45d04ad1d15584 Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Thu, 11 Apr 2024 02:19:24 -0500 Subject: [PATCH 0010/2648] feat: kong cli refactor fixes #1955 (#1974) * feat: migrate to alecthomas/kong for CLI Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * feat: bring in new flag for granular log levels Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * chore: go mod tidy Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * feat: allow loading cli flag values from ["./localai.yaml", "~/.config/localai.yaml", "/etc/localai.yaml"] in that order Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * feat: load from .env file instead of a yaml file Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * feat: better loading for environment files Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * feat(doc): add initial documentation about configuration Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: remove test log lines Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * feat: integrate new documentation into existing pages Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * feat: add documentation on .env files Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: cleanup some documentation table errors Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * feat: refactor CLI logic out to it's own package under core/cli Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --------- Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- .env | 38 +- core/cli/cli.go | 20 + core/cli/models.go | 74 +++ core/cli/run.go | 155 +++++ core/cli/transcript.go | 54 ++ core/cli/tts.go | 61 ++ docs/content/docs/advanced/advanced-usage.md | 101 +++- go.mod | 16 +- go.sum | 61 +- main.go | 595 +++---------------- 10 files changed, 552 insertions(+), 623 deletions(-) create mode 100644 core/cli/cli.go create mode 100644 core/cli/models.go create mode 100644 core/cli/run.go create mode 100644 core/cli/transcript.go create mode 100644 core/cli/tts.go diff --git a/.env b/.env index 82a64e3d..35d4f2d7 100644 --- a/.env +++ b/.env @@ -1,33 +1,33 @@ ## Set number of threads. ## Note: prefer the number of physical cores. Overbooking the CPU degrades performance notably. -# THREADS=14 +# LOCALAI_THREADS=14 ## Specify a different bind address (defaults to ":8080") -# ADDRESS=127.0.0.1:8080 +# LOCALAI_ADDRESS=127.0.0.1:8080 ## Default models context size -# CONTEXT_SIZE=512 +# LOCALAI_CONTEXT_SIZE=512 # ## Define galleries. ## models will to install will be visible in `/models/available` -# GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}] +# LOCALAI_GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}] ## CORS settings -# CORS=true -# CORS_ALLOW_ORIGINS=* +# LOCALAI_CORS=true +# LOCALAI_CORS_ALLOW_ORIGINS=* ## Default path for models # -# MODELS_PATH=/models +# LOCALAI_MODELS_PATH=/models ## Enable debug mode -# DEBUG=true +# LOCALAI_LOG_LEVEL=debug ## Disables COMPEL (Diffusers) # COMPEL=0 ## Enable/Disable single backend (useful if only one GPU is available) -# SINGLE_ACTIVE_BACKEND=true +# LOCALAI_SINGLE_ACTIVE_BACKEND=true ## Specify a build type. Available: cublas, openblas, clblas. ## cuBLAS: This is a GPU-accelerated version of the complete standard BLAS (Basic Linear Algebra Subprograms) library. It's provided by Nvidia and is part of their CUDA toolkit. @@ -46,13 +46,13 @@ # GO_TAGS=stablediffusion ## Path where to store generated images -# IMAGE_PATH=/tmp +# LOCALAI_IMAGE_PATH=/tmp/generated/images ## Specify a default upload limit in MB (whisper) -# UPLOAD_LIMIT +# LOCALAI_UPLOAD_LIMIT=15 ## List of external GRPC backends (note on the container image this variable is already set to use extra backends available in extra/) -# EXTERNAL_GRPC_BACKENDS=my-backend:127.0.0.1:9000,my-backend2:/usr/bin/backend.py +# LOCALAI_EXTERNAL_GRPC_BACKENDS=my-backend:127.0.0.1:9000,my-backend2:/usr/bin/backend.py ### Advanced settings ### ### Those are not really used by LocalAI, but from components in the stack ### @@ -72,18 +72,18 @@ # LLAMACPP_PARALLEL=1 ### Enable to run parallel requests -# PARALLEL_REQUESTS=true +# LOCALAI_PARALLEL_REQUESTS=true ### Watchdog settings ### # Enables watchdog to kill backends that are inactive for too much time -# WATCHDOG_IDLE=true -# -# Enables watchdog to kill backends that are busy for too much time -# WATCHDOG_BUSY=true +# LOCALAI_WATCHDOG_IDLE=true # # Time in duration format (e.g. 1h30m) after which a backend is considered idle -# WATCHDOG_IDLE_TIMEOUT=5m +# LOCALAI_WATCHDOG_IDLE_TIMEOUT=5m +# +# Enables watchdog to kill backends that are busy for too much time +# LOCALAI_WATCHDOG_BUSY=true # # Time in duration format (e.g. 1h30m) after which a backend is considered busy -# WATCHDOG_BUSY_TIMEOUT=5m \ No newline at end of file +# LOCALAI_WATCHDOG_BUSY_TIMEOUT=5m \ No newline at end of file diff --git a/core/cli/cli.go b/core/cli/cli.go new file mode 100644 index 00000000..5e757f64 --- /dev/null +++ b/core/cli/cli.go @@ -0,0 +1,20 @@ +package cli + +import "embed" + +type Context struct { + Debug bool `env:"LOCALAI_DEBUG,DEBUG" default:"false" hidden:"" help:"DEPRECATED, use --log-level=debug instead. Enable debug logging"` + LogLevel *string `env:"LOCALAI_LOG_LEVEL" enum:"error,warn,info,debug" help:"Set the level of logs to output [${enum}]"` + + // This field is not a command line argument/flag, the struct tag excludes it from the parsed CLI + BackendAssets embed.FS `kong:"-"` +} + +var CLI struct { + Context `embed:""` + + Run RunCMD `cmd:"" help:"Run LocalAI, this the default command if no other command is specified. Run 'local-ai run --help' for more information" default:"withargs"` + Models ModelsCMD `cmd:"" help:"Manage LocalAI models and definitions"` + TTS TTSCMD `cmd:"" help:"Convert text to speech"` + Transcript TranscriptCMD `cmd:"" help:"Convert audio to text"` +} diff --git a/core/cli/models.go b/core/cli/models.go new file mode 100644 index 00000000..62ef366b --- /dev/null +++ b/core/cli/models.go @@ -0,0 +1,74 @@ +package cli + +import ( + "encoding/json" + "fmt" + + "github.com/go-skynet/LocalAI/pkg/gallery" + "github.com/rs/zerolog/log" + "github.com/schollz/progressbar/v3" +) + +type ModelsCMDFlags struct { + Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models"` + ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"` +} + +type ModelsList struct { + ModelsCMDFlags `embed:""` +} + +type ModelsInstall struct { + ModelArgs []string `arg:"" optional:"" name:"models" help:"Model configuration URLs to load"` + + ModelsCMDFlags `embed:""` +} + +type ModelsCMD struct { + List ModelsList `cmd:"" help:"List the models avaiable in your galleries" default:"withargs"` + Install ModelsInstall `cmd:"" help:"Install a model from the gallery"` +} + +func (ml *ModelsList) Run(ctx *Context) error { + var galleries []gallery.Gallery + if err := json.Unmarshal([]byte(ml.Galleries), &galleries); err != nil { + log.Error().Err(err).Msg("unable to load galleries") + } + + models, err := gallery.AvailableGalleryModels(galleries, ml.ModelsPath) + if err != nil { + return err + } + for _, model := range models { + if model.Installed { + fmt.Printf(" * %s@%s (installed)\n", model.Gallery.Name, model.Name) + } else { + fmt.Printf(" - %s@%s\n", model.Gallery.Name, model.Name) + } + } + return nil +} + +func (mi *ModelsInstall) Run(ctx *Context) error { + modelName := mi.ModelArgs[0] + + var galleries []gallery.Gallery + if err := json.Unmarshal([]byte(mi.Galleries), &galleries); err != nil { + log.Error().Err(err).Msg("unable to load galleries") + } + + progressBar := progressbar.NewOptions( + 1000, + progressbar.OptionSetDescription(fmt.Sprintf("downloading model %s", modelName)), + progressbar.OptionShowBytes(false), + progressbar.OptionClearOnFinish(), + ) + progressCallback := func(fileName string, current string, total string, percentage float64) { + progressBar.Set(int(percentage * 10)) + } + err := gallery.InstallModelFromGallery(galleries, modelName, mi.ModelsPath, gallery.GalleryModel{}, progressCallback) + if err != nil { + return err + } + return nil +} diff --git a/core/cli/run.go b/core/cli/run.go new file mode 100644 index 00000000..09d09979 --- /dev/null +++ b/core/cli/run.go @@ -0,0 +1,155 @@ +package cli + +import ( + "fmt" + "os" + "strings" + "time" + + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/http" + "github.com/go-skynet/LocalAI/core/startup" + "github.com/rs/zerolog/log" +) + +type RunCMD struct { + ModelArgs []string `arg:"" optional:"" name:"models" help:"Model configuration URLs to load"` + + ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"` + BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"` + ImagePath string `env:"LOCALAI_IMAGE_PATH,IMAGE_PATH" type:"path" default:"/tmp/generated/images" help:"Location for images generated by backends (e.g. stablediffusion)" group:"storage"` + AudioPath string `env:"LOCALAI_AUDIO_PATH,AUDIO_PATH" type:"path" default:"/tmp/generated/audio" help:"Location for audio generated by backends (e.g. piper)" group:"storage"` + UploadPath string `env:"LOCALAI_UPLOAD_PATH,UPLOAD_PATH" type:"path" default:"/tmp/localai/upload" help:"Path to store uploads from files api" group:"storage"` + ConfigPath string `env:"LOCALAI_CONFIG_PATH,CONFIG_PATH" default:"/tmp/localai/config" group:"storage"` + LocalaiConfigDir string `env:"LOCALAI_CONFIG_DIR" type:"path" default:"${basepath}/configuration" help:"Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json)" group:"storage"` + // The alias on this option is there to preserve functionality with the old `--config-file` parameter + ModelsConfigFile string `env:"LOCALAI_MODELS_CONFIG_FILE,CONFIG_FILE" aliases:"config-file" help:"YAML file containing a list of model backend configs" group:"storage"` + + Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models"` + AutoloadGalleries bool `env:"LOCALAI_AUTOLOAD_GALLERIES,AUTOLOAD_GALLERIES" group:"models"` + RemoteLibrary string `env:"LOCALAI_REMOTE_LIBRARY,REMOTE_LIBRARY" default:"${remoteLibraryURL}" help:"A LocalAI remote library URL" group:"models"` + PreloadModels string `env:"LOCALAI_PRELOAD_MODELS,PRELOAD_MODELS" help:"A List of models to apply in JSON at start" group:"models"` + Models []string `env:"LOCALAI_MODELS,MODELS" help:"A List of model configuration URLs to load" group:"models"` + PreloadModelsConfig string `env:"LOCALAI_PRELOAD_MODELS_CONFIG,PRELOAD_MODELS_CONFIG" help:"A List of models to apply at startup. Path to a YAML config file" group:"models"` + + F16 bool `name:"f16" env:"LOCALAI_F16,F16" help:"Enable GPU acceleration" group:"performance"` + Threads int `env:"LOCALAI_THREADS,THREADS" short:"t" default:"4" help:"Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested" group:"performance"` + ContextSize int `env:"LOCALAI_CONTEXT_SIZE,CONTEXT_SIZE" default:"512" help:"Default context size for models" group:"performance"` + + Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"` + CORS bool `env:"LOCALAI_CORS,CORS" help:"" group:"api"` + CORSAllowOrigins string `env:"LOCALAI_CORS_ALLOW_ORIGINS,CORS_ALLOW_ORIGINS" group:"api"` + UploadLimit int `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"` + APIKeys []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"` + DisableWelcome bool `env:"LOCALAI_DISABLE_WELCOME,DISABLE_WELCOME" default:"false" help:"Disable welcome pages" group:"api"` + + ParallelRequests bool `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"` + SingleActiveBackend bool `env:"LOCALAI_SINGLE_ACTIVE_BACKEND,SINGLE_ACTIVE_BACKEND" help:"Allow only one backend to be run at a time" group:"backends"` + PreloadBackendOnly bool `env:"LOCALAI_PRELOAD_BACKEND_ONLY,PRELOAD_BACKEND_ONLY" default:"false" help:"Do not launch the API services, only the preloaded models / backends are started (useful for multi-node setups)" group:"backends"` + ExternalGRPCBackends []string `env:"LOCALAI_EXTERNAL_GRPC_BACKENDS,EXTERNAL_GRPC_BACKENDS" help:"A list of external grpc backends" group:"backends"` + EnableWatchdogIdle bool `env:"LOCALAI_WATCHDOG_IDLE,WATCHDOG_IDLE" default:"false" help:"Enable watchdog for stopping backends that are idle longer than the watchdog-idle-timeout" group:"backends"` + WatchdogIdleTimeout string `env:"LOCALAI_WATCHDOG_IDLE_TIMEOUT,WATCHDOG_IDLE_TIMEOUT" default:"15m" help:"Threshold beyond which an idle backend should be stopped" group:"backends"` + EnableWatchdogBusy bool `env:"LOCALAI_WATCHDOG_BUSY,WATCHDOG_BUSY" default:"false" help:"Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout" group:"backends"` + WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"` +} + +func (r *RunCMD) Run(ctx *Context) error { + opts := []config.AppOption{ + config.WithConfigFile(r.ModelsConfigFile), + config.WithJSONStringPreload(r.PreloadModels), + config.WithYAMLConfigPreload(r.PreloadModelsConfig), + config.WithModelPath(r.ModelsPath), + config.WithContextSize(r.ContextSize), + config.WithDebug(ctx.Debug), + config.WithImageDir(r.ImagePath), + config.WithAudioDir(r.AudioPath), + config.WithUploadDir(r.UploadPath), + config.WithConfigsDir(r.ConfigPath), + config.WithF16(r.F16), + config.WithStringGalleries(r.Galleries), + config.WithModelLibraryURL(r.RemoteLibrary), + config.WithDisableMessage(false), + config.WithCors(r.CORS), + config.WithCorsAllowOrigins(r.CORSAllowOrigins), + config.WithThreads(r.Threads), + config.WithBackendAssets(ctx.BackendAssets), + config.WithBackendAssetsOutput(r.BackendAssetsPath), + config.WithUploadLimitMB(r.UploadLimit), + config.WithApiKeys(r.APIKeys), + config.WithModelsURL(append(r.Models, r.ModelArgs...)...), + } + + idleWatchDog := r.EnableWatchdogIdle + busyWatchDog := r.EnableWatchdogBusy + + if r.DisableWelcome { + opts = append(opts, config.DisableWelcomePage) + } + + if idleWatchDog || busyWatchDog { + opts = append(opts, config.EnableWatchDog) + if idleWatchDog { + opts = append(opts, config.EnableWatchDogIdleCheck) + dur, err := time.ParseDuration(r.WatchdogIdleTimeout) + if err != nil { + return err + } + opts = append(opts, config.SetWatchDogIdleTimeout(dur)) + } + if busyWatchDog { + opts = append(opts, config.EnableWatchDogBusyCheck) + dur, err := time.ParseDuration(r.WatchdogBusyTimeout) + if err != nil { + return err + } + opts = append(opts, config.SetWatchDogBusyTimeout(dur)) + } + } + if r.ParallelRequests { + opts = append(opts, config.EnableParallelBackendRequests) + } + if r.SingleActiveBackend { + opts = append(opts, config.EnableSingleBackend) + } + + // split ":" to get backend name and the uri + for _, v := range r.ExternalGRPCBackends { + backend := v[:strings.IndexByte(v, ':')] + uri := v[strings.IndexByte(v, ':')+1:] + opts = append(opts, config.WithExternalBackend(backend, uri)) + } + + if r.AutoloadGalleries { + opts = append(opts, config.EnableGalleriesAutoload) + } + + if r.PreloadBackendOnly { + _, _, _, err := startup.Startup(opts...) + return err + } + + cl, ml, options, err := startup.Startup(opts...) + + if err != nil { + return fmt.Errorf("failed basic startup tasks with error %s", err.Error()) + } + + // Watch the configuration directory + // If the directory does not exist, we don't watch it + if _, err := os.Stat(r.LocalaiConfigDir); err == nil { + closeConfigWatcherFn, err := startup.WatchConfigDirectory(r.LocalaiConfigDir, options) + defer closeConfigWatcherFn() + + if err != nil { + return fmt.Errorf("failed while watching configuration directory %s", r.LocalaiConfigDir) + } + } + + appHTTP, err := http.App(cl, ml, options) + if err != nil { + log.Error().Err(err).Msg("error during HTTP App construction") + return err + } + + return appHTTP.Listen(r.Address) +} diff --git a/core/cli/transcript.go b/core/cli/transcript.go new file mode 100644 index 00000000..9f36a77c --- /dev/null +++ b/core/cli/transcript.go @@ -0,0 +1,54 @@ +package cli + +import ( + "context" + "errors" + "fmt" + + "github.com/go-skynet/LocalAI/core/backend" + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/pkg/model" +) + +type TranscriptCMD struct { + Filename string `arg:""` + + Backend string `short:"b" default:"whisper" help:"Backend to run the transcription model"` + Model string `short:"m" required:"" help:"Model name to run the TTS"` + Language string `short:"l" help:"Language of the audio file"` + Threads int `short:"t" default:"1" help:"Number of threads used for parallel computation"` + ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"` + BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"` +} + +func (t *TranscriptCMD) Run(ctx *Context) error { + opts := &config.ApplicationConfig{ + ModelPath: t.ModelsPath, + Context: context.Background(), + AssetsDestination: t.BackendAssetsPath, + } + + cl := config.NewBackendConfigLoader() + ml := model.NewModelLoader(opts.ModelPath) + if err := cl.LoadBackendConfigsFromPath(t.ModelsPath); err != nil { + return err + } + + c, exists := cl.GetBackendConfig(t.Model) + if !exists { + return errors.New("model not found") + } + + c.Threads = &t.Threads + + defer ml.StopAllGRPC() + + tr, err := backend.ModelTranscription(t.Filename, t.Language, ml, c, opts) + if err != nil { + return err + } + for _, segment := range tr.Segments { + fmt.Println(segment.Start.String(), "-", segment.Text) + } + return nil +} diff --git a/core/cli/tts.go b/core/cli/tts.go new file mode 100644 index 00000000..1d8fd3a3 --- /dev/null +++ b/core/cli/tts.go @@ -0,0 +1,61 @@ +package cli + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/go-skynet/LocalAI/core/backend" + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/pkg/model" +) + +type TTSCMD struct { + Text []string `arg:""` + + Backend string `short:"b" default:"piper" help:"Backend to run the TTS model"` + Model string `short:"m" required:"" help:"Model name to run the TTS"` + Voice string `short:"v" help:"Voice name to run the TTS"` + OutputFile string `short:"o" type:"path" help:"The path to write the output wav file"` + ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"` + BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"` +} + +func (t *TTSCMD) Run(ctx *Context) error { + outputFile := t.OutputFile + outputDir := t.BackendAssetsPath + if outputFile != "" { + outputDir = filepath.Dir(outputFile) + } + + text := strings.Join(t.Text, " ") + + opts := &config.ApplicationConfig{ + ModelPath: t.ModelsPath, + Context: context.Background(), + AudioDir: outputDir, + AssetsDestination: t.BackendAssetsPath, + } + ml := model.NewModelLoader(opts.ModelPath) + + defer ml.StopAllGRPC() + + options := config.BackendConfig{} + options.SetDefaults() + + filePath, _, err := backend.ModelTTS(t.Backend, text, t.Model, t.Voice, ml, opts, options) + if err != nil { + return err + } + if outputFile != "" { + if err := os.Rename(filePath, outputFile); err != nil { + return err + } + fmt.Printf("Generate file %s\n", outputFile) + } else { + fmt.Printf("Generate file %s\n", filePath) + } + return nil +} diff --git a/docs/content/docs/advanced/advanced-usage.md b/docs/content/docs/advanced/advanced-usage.md index c9926bab..dace5803 100644 --- a/docs/content/docs/advanced/advanced-usage.md +++ b/docs/content/docs/advanced/advanced-usage.md @@ -382,35 +382,84 @@ docker run --env-file .env localai ### CLI parameters -You can control LocalAI with command line arguments, to specify a binding address, or the number of threads. +You can control LocalAI with command line arguments, to specify a binding address, or the number of threads. Any command line parameter can be specified via an environment variable. +#### Global Flags +| Parameter | Default | Description | Environment Variable | +|-----------|---------|-------------|----------------------| +| -h, --help | | Show context-sensitive help. | +| --log-level | info | Set the level of logs to output [error,warn,info,debug] | $LOCALAI_LOG_LEVEL | -| Parameter | Environmental Variable | Default Variable | Description | -| ------------------------------ | ------------------------------- | -------------------------------------------------- | ------------------------------------------------------------------- | -| --f16 | $F16 | false | Enable f16 mode | -| --debug | $DEBUG | false | Enable debug mode | -| --cors | $CORS | false | Enable CORS support | -| --cors-allow-origins value | $CORS_ALLOW_ORIGINS | | Specify origins allowed for CORS | -| --threads value | $THREADS | 4 | Number of threads to use for parallel computation | -| --models-path value | $MODELS_PATH | ./models | Path to the directory containing models used for inferencing | -| --preload-models value | $PRELOAD_MODELS | | List of models to preload in JSON format at startup | -| --preload-models-config value | $PRELOAD_MODELS_CONFIG | | A config with a list of models to apply at startup. Specify the path to a YAML config file | -| --config-file value | $CONFIG_FILE | | Path to the config file | -| --address value | $ADDRESS | :8080 | Specify the bind address for the API server | -| --image-path value | $IMAGE_PATH | | Path to the directory used to store generated images | -| --context-size value | $CONTEXT_SIZE | 512 | Default context size of the model | -| --upload-limit value | $UPLOAD_LIMIT | 15 | Default upload limit in megabytes (audio file upload) | -| --galleries | $GALLERIES | | Allows to set galleries from command line | -|--parallel-requests | $PARALLEL_REQUESTS | false | Enable backends to handle multiple requests in parallel. This is for backends that supports multiple requests in parallel, like llama.cpp or vllm | -| --single-active-backend | $SINGLE_ACTIVE_BACKEND | false | Allow only one backend to be running | -| --api-keys value | $API_KEY | empty | List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys. -| --enable-watchdog-idle | $WATCHDOG_IDLE | false | Enable watchdog for stopping idle backends. This will stop the backends if are in idle state for too long. (default: false) [$WATCHDOG_IDLE] -| --enable-watchdog-busy | $WATCHDOG_BUSY | false | Enable watchdog for stopping busy backends that exceed a defined threshold.| -| --watchdog-busy-timeout value | $WATCHDOG_BUSY_TIMEOUT | 5m | Watchdog timeout. This will restart the backend if it crashes. | -| --watchdog-idle-timeout value | $WATCHDOG_IDLE_TIMEOUT | 15m | Watchdog idle timeout. This will restart the backend if it crashes. | -| --preload-backend-only | $PRELOAD_BACKEND_ONLY | false | If set, the api is NOT launched, and only the preloaded models / backends are started. This is intended for multi-node setups. | -| --external-grpc-backends | EXTERNAL_GRPC_BACKENDS | none | Comma separated list of external gRPC backends to use. Format: `name:host:port` or `name:/path/to/file` | +#### Storage Flags +| Parameter | Default | Description | Environment Variable | +|-----------|---------|-------------|----------------------| +| --models-path | /home/cryptk/Documents/sourcecode/LocalAI/models | Path containing models used for inferencing | $LOCALAI_MODELS_PATH | +| --backend-assets-path |/tmp/localai/backend_data | Path used to extract libraries that are required by some of the backends in runtime | $LOCALAI_BACKEND_ASSETS_PATH | +| --image-path | /tmp/generated/images | Location for images generated by backends (e.g. stablediffusion) | $LOCALAI_IMAGE_PATH | +| --audio-path | /tmp/generated/audio | Location for audio generated by backends (e.g. piper) | $LOCALAI_AUDIO_PATH | +| --upload-path | /tmp/localai/upload | Path to store uploads from files api | $LOCALAI_UPLOAD_PATH | +| --config-path | /tmp/localai/config | | $LOCALAI_CONFIG_PATH | +| --localai-config-dir | /home/cryptk/Documents/sourcecode/LocalAI/configuration | Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json) | $LOCALAI_CONFIG_DIR | +| --models-config-file | STRING | YAML file containing a list of model backend configs | $LOCALAI_MODELS_CONFIG_FILE | +#### Models Flags +| Parameter | Default | Description | Environment Variable | +|-----------|---------|-------------|----------------------| +| --galleries | STRING | JSON list of galleries | $LOCALAI_GALLERIES | +| --autoload-galleries | | | $LOCALAI_AUTOLOAD_GALLERIES | +| --remote-library | "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/model_library.yaml" | A LocalAI remote library URL | $LOCALAI_REMOTE_LIBRARY | +| --preload-models | STRING | A List of models to apply in JSON at start |$LOCALAI_PRELOAD_MODELS | +| --models | MODELS,... | A List of model configuration URLs to load | $LOCALAI_MODELS | +| --preload-models-config | STRING | A List of models to apply at startup. Path to a YAML config file | $LOCALAI_PRELOAD_MODELS_CONFIG | + +#### Performance Flags +| Parameter | Default | Description | Environment Variable | +|-----------|---------|-------------|----------------------| +| --f16 | | Enable GPU acceleration | $LOCALAI_F16 | +| -t, --threads | 4 | Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested | $LOCALAI_THREADS | +| --context-size | 512 | Default context size for models | $LOCALAI_CONTEXT_SIZE | + +#### API Flags +| Parameter | Default | Description | Environment Variable | +|-----------|---------|-------------|----------------------| +| --address | ":8080" | Bind address for the API server | $LOCALAI_ADDRESS | +| --cors | | | $LOCALAI_CORS | +| --cors-allow-origins | | | $LOCALAI_CORS_ALLOW_ORIGINS | +| --upload-limit | 15 | Default upload-limit in MB | $LOCALAI_UPLOAD_LIMIT | +| --api-keys | API-KEYS,... | List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys | $LOCALAI_API_KEY | +| --disable-welcome | | Disable welcome pages | $LOCALAI_DISABLE_WELCOME | + +#### Backend Flags +| Parameter | Default | Description | Environment Variable | +|-----------|---------|-------------|----------------------| +| --parallel-requests | | Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm) | $LOCALAI_PARALLEL_REQUESTS | +| --single-active-backend | | Allow only one backend to be run at a time | $LOCALAI_SINGLE_ACTIVE_BACKEND | +| --preload-backend-only | | Do not launch the API services, only the preloaded models / backends are started (useful for multi-node setups) | $LOCALAI_PRELOAD_BACKEND_ONLY | +| --external-grpc-backends | EXTERNAL-GRPC-BACKENDS,... | A list of external grpc backends | $LOCALAI_EXTERNAL_GRPC_BACKENDS | +| --enable-watchdog-idle | | Enable watchdog for stopping backends that are idle longer than the watchdog-idle-timeout | $LOCALAI_WATCHDOG_IDLE | +| --watchdog-idle-timeout | 15m | Threshold beyond which an idle backend should be stopped | $LOCALAI_WATCHDOG_IDLE_TIMEOUT, $WATCHDOG_IDLE_TIMEOUT | +| --enable-watchdog-busy | | Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout | $LOCALAI_WATCHDOG_BUSY | +| --watchdog-busy-timeout | 5m | Threshold beyond which a busy backend should be stopped | $LOCALAI_WATCHDOG_BUSY_TIMEOUT | + +### .env files + +Any settings being provided by an Environment Variable can also be provided from within .env files. There are several locations that will be checked for relevant .env files. In order of precedence they are: + +- .env within the current directory +- localai.env within the current directory +- localai.env within the home directory +- .config/localai.env within the home directory +- /etc/localai.env + +Environment variables within files earlier in the list will take precedence over environment variables defined in files later in the list. + +An example .env file is: + +``` +LOCALAI_THREADS=10 +LOCALAI_MODELS_PATH=/mnt/storage/localai/models +LOCALAI_F16=true +``` ### Extra backends diff --git a/go.mod b/go.mod index 4dd207c7..fac7acfd 100644 --- a/go.mod +++ b/go.mod @@ -13,8 +13,8 @@ require ( github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1 github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428 github.com/gofiber/fiber/v2 v2.52.0 + github.com/gofiber/swagger v1.0.0 github.com/gofiber/template/html/v2 v2.1.1 - github.com/gomarkdown/markdown v0.0.0-20231222211730-1d6d20845b47 github.com/google/uuid v1.5.0 github.com/hashicorp/go-multierror v1.1.1 github.com/hpcloud/tail v1.0.0 @@ -30,11 +30,12 @@ require ( github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5 github.com/prometheus/client_golang v1.17.0 github.com/rs/zerolog v1.31.0 + github.com/russross/blackfriday v1.6.0 github.com/sashabaranov/go-openai v1.20.4 github.com/schollz/progressbar/v3 v3.13.1 github.com/stretchr/testify v1.9.0 + github.com/swaggo/swag v1.16.3 github.com/tmc/langchaingo v0.0.0-20231019140956-c636b3da7701 - github.com/urfave/cli/v2 v2.27.1 github.com/valyala/fasthttp v1.51.0 go.opentelemetry.io/otel v1.19.0 go.opentelemetry.io/otel/exporters/prometheus v0.42.0 @@ -64,8 +65,6 @@ require ( github.com/Masterminds/semver/v3 v3.2.0 // indirect github.com/Microsoft/go-winio v0.6.0 // indirect github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 // indirect - github.com/PuerkitoBio/purell v1.2.1 // indirect - github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect github.com/alecthomas/chroma v0.10.0 // indirect github.com/aymanbagabas/go-osc52 v1.0.3 // indirect github.com/aymerick/douceur v0.2.0 // indirect @@ -85,7 +84,6 @@ require ( github.com/go-openapi/jsonreference v0.21.0 // indirect github.com/go-openapi/spec v0.21.0 // indirect github.com/go-openapi/swag v0.23.0 // indirect - github.com/gofiber/swagger v1.0.0 // indirect github.com/gofiber/template v1.8.3 // indirect github.com/gofiber/utils v1.1.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect @@ -119,12 +117,10 @@ require ( github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16 // indirect github.com/prometheus/common v0.44.0 // indirect github.com/prometheus/procfs v0.11.1 // indirect - github.com/russross/blackfriday v1.6.0 // indirect github.com/shopspring/decimal v1.2.0 // indirect github.com/sirupsen/logrus v1.8.1 // indirect github.com/spf13/cast v1.3.1 // indirect github.com/swaggo/files/v2 v2.0.0 // indirect - github.com/swaggo/swag v1.16.3 // indirect github.com/ulikunitz/xz v0.5.9 // indirect github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f // indirect github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect @@ -140,12 +136,11 @@ require ( google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d // indirect gopkg.in/fsnotify.v1 v1.4.7 // indirect gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect - sigs.k8s.io/yaml v1.4.0 // indirect ) require ( + github.com/alecthomas/kong v0.9.0 github.com/andybalholm/brotli v1.0.5 // indirect - github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect github.com/go-audio/audio v1.0.0 // indirect github.com/go-audio/riff v1.0.0 // indirect github.com/go-logr/logr v1.2.4 // indirect @@ -153,16 +148,15 @@ require ( github.com/google/go-cmp v0.6.0 // indirect github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect github.com/hashicorp/errwrap v1.0.0 // indirect + github.com/joho/godotenv v1.5.1 github.com/klauspost/compress v1.17.0 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.20 // indirect github.com/mattn/go-runewidth v0.0.15 // indirect github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760 github.com/rivo/uniseg v0.2.0 // indirect - github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/valyala/bytebufferpool v1.0.0 // indirect github.com/valyala/tcplisten v1.0.0 // indirect - github.com/xrash/smetrics v0.0.0-20240312152122-5f08fbb34913 // indirect golang.org/x/net v0.22.0 // indirect golang.org/x/sys v0.18.0 // indirect golang.org/x/text v0.14.0 // indirect diff --git a/go.sum b/go.sum index f81f10c8..dc08c465 100644 --- a/go.sum +++ b/go.sum @@ -13,12 +13,14 @@ github.com/Microsoft/go-winio v0.6.0 h1:slsWYD/zyx7lCXoZVlvQrj0hPTM1HI4+v1sIda2y github.com/Microsoft/go-winio v0.6.0/go.mod h1:cTAf44im0RAYeL23bpB+fzCyDH2MJiz2BO69KH/soAE= github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 h1:TngWCqHvy9oXAN6lEVMRuU21PR1EtLVZJmdB18Gu3Rw= github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5/go.mod h1:lmUJ/7eu/Q8D7ML55dXQrVaamCz2vxCfdQBasLZfHKk= -github.com/PuerkitoBio/purell v1.2.1 h1:QsZ4TjvwiMpat6gBCBxEQI0rcS9ehtkKtSpiUnd9N28= -github.com/PuerkitoBio/purell v1.2.1/go.mod h1:ZwHcC/82TOaovDi//J/804umJFFmbOHPngi8iYYv/Eo= -github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M= -github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= +github.com/alecthomas/assert/v2 v2.6.0 h1:o3WJwILtexrEUk3cUVal3oiQY2tfgr/FHWiz/v2n4FU= +github.com/alecthomas/assert/v2 v2.6.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k= github.com/alecthomas/chroma v0.10.0 h1:7XDcGkCQopCNKjZHfYrNLraA+M7e0fMiJ/Mfikbfjek= github.com/alecthomas/chroma v0.10.0/go.mod h1:jtJATyUxlIORhUOFNA9NZDWGAQ8wpxQQqNSB4rjA/1s= +github.com/alecthomas/kong v0.9.0 h1:G5diXxc85KvoV2f0ZRVuMsi45IrBgx9zDNGNj165aPA= +github.com/alecthomas/kong v0.9.0/go.mod h1:Y47y5gKfHp1hDc7CH7OeXgLIpp+Q2m1Ni0L5s3bI8Os= +github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc= +github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4= github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y= github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs= github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= @@ -45,10 +47,6 @@ github.com/containerd/continuity v0.3.0/go.mod h1:wJEAIwKOm/pBZuBd0JmeTvnLquTB1A github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= -github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w= -github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= -github.com/cpuguy83/go-md2man/v2 v2.0.4 h1:wfIWP927BUkWJb2NmU/kNDYIBTh/ziUX91+lVfRxZq4= -github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.11 h1:07n33Z8lZxZ2qwegKbObQohDhXDQxiMMz1NOUGYlesw= github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4= @@ -95,8 +93,6 @@ github.com/go-openapi/spec v0.21.0 h1:LTVzPc3p/RzRnkQqLRndbAzjY0d0BCL72A6j3CdL9Z github.com/go-openapi/spec v0.21.0/go.mod h1:78u6VdPw81XU44qEWGhtr982gJ5BWg2c0I5XwVMotYk= github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE= github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= -github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428 h1:WYjkXL0Nw7dN2uDBMVCWQ8xLavrIhjF/DLczuh5L9TY= -github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428/go.mod h1:iub0ugfTnflE3rcIuqV2pQSo15nEw3GLW/utm5gyERo= github.com/go-sql-driver/mysql v1.7.1 h1:lUIinVbN1DY0xBg0eMOzmmtGoHwWBbvnWubQUrtU8EI= github.com/go-sql-driver/mysql v1.7.1/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI= github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE= @@ -104,8 +100,6 @@ github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEe github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/godbus/dbus/v5 v5.0.6/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= -github.com/gofiber/fiber/v2 v2.50.0 h1:ia0JaB+uw3GpNSCR5nvC5dsaxXjRU5OEu36aytx+zGw= -github.com/gofiber/fiber/v2 v2.50.0/go.mod h1:21eytvay9Is7S6z+OgPi7c7n4++tnClWmhpimVHMimw= github.com/gofiber/fiber/v2 v2.52.0 h1:S+qXi7y+/Pgvqq4DrSmREGiFwtB7Bu6+QFLuIHYw/UE= github.com/gofiber/fiber/v2 v2.52.0/go.mod h1:KEOE+cXMhXG0zHc9d8+E38hoX+ZN7bhOtgeF2oT6jrQ= github.com/gofiber/swagger v1.0.0 h1:BzUzDS9ZT6fDUa692kxmfOjc1DZiloLiPK/W5z1H1tc= @@ -131,8 +125,6 @@ github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/golang/snappy v0.0.2 h1:aeE13tS0IiQgFjYdoL8qN3K1N2bXXtI6Vi51/y7BpMw= github.com/golang/snappy v0.0.2/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= -github.com/gomarkdown/markdown v0.0.0-20231222211730-1d6d20845b47 h1:k4Tw0nt6lwro3Uin8eqoET7MDA4JnT8YgbCjc/g5E3k= -github.com/gomarkdown/markdown v0.0.0-20231222211730-1d6d20845b47/go.mod h1:JDGcbDT52eL4fju3sZ4TeHGsQwhG9nbDV21aMyhwPoA= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= @@ -147,8 +139,6 @@ github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLe github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4= github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ= github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4= -github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.5.0 h1:1p67kYwdtXjb0gL0BPiP1Av9wiZPo5A8z2cWkTZ+eyU= github.com/google/uuid v1.5.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gorilla/css v1.0.0 h1:BQqNyPTi50JCFMTw/b67hByjMVXZRwGha6wxVGkeihY= @@ -157,6 +147,8 @@ github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/U github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo= github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= +github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= +github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg= github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/huandu/xstrings v1.3.3 h1:/Gcsuc1x8JVbJ9/rlye4xZnVAbEkGauT8lbebqcQws4= @@ -165,6 +157,8 @@ github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1: github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= +github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= +github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw= @@ -172,8 +166,6 @@ github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/compress v1.11.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= -github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I= -github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= github.com/klauspost/compress v1.17.0 h1:Rnbp4K9EjcDuVuHtd0dgA4qNuv9yKDYKK1ulpJwgrqM= github.com/klauspost/compress v1.17.0/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= @@ -198,7 +190,6 @@ github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxec github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= -github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA= github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= @@ -296,8 +287,6 @@ github.com/rs/zerolog v1.31.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWR github.com/russross/blackfriday v1.6.0 h1:KqfZb0pUVN2lYqZUYRddxF4OR8ZMURnJIG5Y3VRLtww= github.com/russross/blackfriday v1.6.0/go.mod h1:ti0ldHuxg49ri4ksnFxlkCfN+hvslNlmVHqNRXXJNAY= github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= -github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/sashabaranov/go-openai v1.20.4 h1:095xQ/fAtRa0+Rj21sezVJABgKfGPNbyx/sAN/hJUmg= github.com/sashabaranov/go-openai v1.20.4/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg= github.com/schollz/progressbar/v3 v3.13.1 h1:o8rySDYiQ59Mwzy2FELeHY5ZARXZTVJC7iHD6PEFUiE= @@ -328,7 +317,6 @@ github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= @@ -349,14 +337,8 @@ github.com/ulikunitz/xz v0.5.8/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oW github.com/ulikunitz/xz v0.5.9 h1:RsKRIA2MO8x56wkkcd3LbtcE/uMszhb6DpRf+3uwa3I= github.com/ulikunitz/xz v0.5.9/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= -github.com/urfave/cli/v2 v2.25.7 h1:VAzn5oq403l5pHjc4OhD54+XGO9cdKVL/7lDjF+iKUs= -github.com/urfave/cli/v2 v2.25.7/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ= -github.com/urfave/cli/v2 v2.27.1 h1:8xSQ6szndafKVRmfyeUMxkNUJQMjL1F2zmsZ+qHpfho= -github.com/urfave/cli/v2 v2.27.1/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ= github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= -github.com/valyala/fasthttp v1.50.0 h1:H7fweIlBm0rXLs2q0XbalvJ6r0CUPFWK3/bB4N13e9M= -github.com/valyala/fasthttp v1.50.0/go.mod h1:k2zXd82h/7UZc3VOdJ2WaUqt1uZ/XpXAfE9i+HBC3lA= github.com/valyala/fasthttp v1.51.0 h1:8b30A5JlZ6C7AS81RsWjYMQmrZG6feChmgAolCl1SqA= github.com/valyala/fasthttp v1.51.0/go.mod h1:oI2XroL+lI7vdXyYoQk03bXBThfFl2cVdIA3Xl7cH8g= github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8= @@ -371,10 +353,6 @@ github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17 github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y= github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofmx9yWTog9BfvIu0q41lo= github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos= -github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU= -github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8= -github.com/xrash/smetrics v0.0.0-20240312152122-5f08fbb34913 h1:+qGGcbkzsfDQNPPe9UDgpxAWQrhbbBXOYJFQDq/dtJw= -github.com/xrash/smetrics v0.0.0-20240312152122-5f08fbb34913/go.mod h1:4aEEwZQutDLsQv2Deui4iYQ6DWTxR14g6m8Wv88+Xqk= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= @@ -401,15 +379,11 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4= -golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc= -golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4= golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA= golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= -golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc= -golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.16.0 h1:QX4fJ0Rr5cPQCF7O9lh9Se4pmwfwskqZfq5moyldzic= golang.org/x/mod v0.16.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -425,8 +399,6 @@ golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.0.0-20221002022538-bcab6841153b/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk= golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY= -golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM= -golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= golang.org/x/net v0.22.0 h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc= golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -435,9 +407,8 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E= -golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ= +golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -471,16 +442,12 @@ golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y= -golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc= golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U= -golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek= -golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U= golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8= golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -488,8 +455,6 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k= -golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -499,8 +464,6 @@ golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roY golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= -golang.org/x/tools v0.12.0 h1:YW6HUoUmYBpwSgyaGaZq1fHjrBjX1rlpZ54T6mu2kss= -golang.org/x/tools v0.12.0/go.mod h1:Sc0INKfu04TlqNoRA1hgpFZbhYXHPr4V5DzpSBTPqQM= golang.org/x/tools v0.19.0 h1:tfGCXNR1OsFG+sVdLAitlpjAvD/I6dHDKnYrpEZUHkw= golang.org/x/tools v0.19.0/go.mod h1:qoJWxmGSIBmAeriMx19ogtrEPrGtDbPK634QFIcLAhc= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -541,5 +504,3 @@ gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gotest.tools/v3 v3.0.2/go.mod h1:3SzNCllyD9/Y+b5r9JIKQ474KzkZyqLqEfYqMsX94Bk= gotest.tools/v3 v3.3.0 h1:MfDY1b1/0xN1CyMlQDac0ziEy9zJQd9CXBRRDHw2jJo= gotest.tools/v3 v3.3.0/go.mod h1:Mcr9QNxkg0uMvy/YElmo4SpXgJKWgQvYrT7Kw5RzJ1A= -sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= -sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/main.go b/main.go index 53966ba5..8b5696d1 100644 --- a/main.go +++ b/main.go @@ -1,41 +1,30 @@ package main import ( - "context" - "encoding/json" - "errors" - "fmt" "os" "os/signal" "path/filepath" - "strings" "syscall" - "time" - - "github.com/go-skynet/LocalAI/core/backend" - "github.com/go-skynet/LocalAI/core/config" - - "github.com/go-skynet/LocalAI/core/http" - "github.com/go-skynet/LocalAI/core/startup" + "github.com/alecthomas/kong" + "github.com/go-skynet/LocalAI/core/cli" "github.com/go-skynet/LocalAI/internal" - "github.com/go-skynet/LocalAI/pkg/gallery" - model "github.com/go-skynet/LocalAI/pkg/model" + "github.com/joho/godotenv" + "github.com/rs/zerolog" "github.com/rs/zerolog/log" - progressbar "github.com/schollz/progressbar/v3" - "github.com/urfave/cli/v2" _ "github.com/go-skynet/LocalAI/swagger" ) -const ( - remoteLibraryURL = "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/model_library.yaml" -) - func main() { + var err error + + // Initialize zerolog at a level of INFO, we will set the desired level after we parse the CLI options log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr}) - // clean up process + zerolog.SetGlobalLevel(zerolog.InfoLevel) + + // Catch signals from the OS requesting us to exit go func() { c := make(chan os.Signal, 1) // we need to reserve to buffer size 1, so the notifier are not blocked signal.Notify(c, os.Interrupt, syscall.SIGTERM) @@ -43,511 +32,83 @@ func main() { os.Exit(1) }() - path, err := os.Getwd() - if err != nil { - log.Error().Err(err).Msg("failed to get current directory") - os.Exit(1) + // handle loading environment variabled from .env files + envFiles := []string{".env", "localai.env"} + homeDir, err := os.UserHomeDir() + if err == nil { + envFiles = append(envFiles, filepath.Join(homeDir, "localai.env"), filepath.Join(homeDir, ".config/localai.env")) + } + envFiles = append(envFiles, "/etc/localai.env") + + for _, envFile := range envFiles { + if _, err := os.Stat(envFile); err == nil { + log.Info().Str("envFile", envFile).Msg("loading environment variables from file") + godotenv.Load(envFile) + } } - app := &cli.App{ - Name: "LocalAI", - Version: internal.PrintableVersion(), - Usage: "OpenAI, OSS alternative. Drop-in compatible API for running LLM, GPT and genAI models locally on CPU, GPUs with consumer grade hardware. Supported server endpoints: OpenAI, Elevenlabs", - Flags: []cli.Flag{ - &cli.BoolFlag{ - Name: "f16", - EnvVars: []string{"F16"}, - }, - &cli.BoolFlag{ - Name: "autoload-galleries", - EnvVars: []string{"AUTOLOAD_GALLERIES"}, - }, - &cli.BoolFlag{ - Name: "debug", - EnvVars: []string{"DEBUG"}, - }, - &cli.BoolFlag{ - Name: "single-active-backend", - EnvVars: []string{"SINGLE_ACTIVE_BACKEND"}, - Usage: "Allow only one backend to be running.", - }, - &cli.BoolFlag{ - Name: "parallel-requests", - EnvVars: []string{"PARALLEL_REQUESTS"}, - Usage: "Enable backends to handle multiple requests in parallel. This is for backends that supports multiple requests in parallel, like llama.cpp or vllm", - }, - &cli.BoolFlag{ - Name: "cors", - EnvVars: []string{"CORS"}, - }, - &cli.StringFlag{ - Name: "cors-allow-origins", - EnvVars: []string{"CORS_ALLOW_ORIGINS"}, - }, - &cli.IntFlag{ - Name: "threads", - Usage: "Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested.", - EnvVars: []string{"THREADS"}, - Value: 4, - }, - &cli.StringFlag{ - Name: "models-path", - Usage: "Path containing models used for inferencing", - EnvVars: []string{"MODELS_PATH"}, - Value: filepath.Join(path, "models"), - }, - &cli.StringFlag{ - Name: "galleries", - Usage: "JSON list of galleries", - EnvVars: []string{"GALLERIES"}, - }, - &cli.StringFlag{ - Name: "remote-library", - Usage: "A LocalAI remote library URL", - EnvVars: []string{"REMOTE_LIBRARY"}, - Value: remoteLibraryURL, - }, - &cli.StringFlag{ - Name: "preload-models", - Usage: "A List of models to apply in JSON at start", - EnvVars: []string{"PRELOAD_MODELS"}, - }, - &cli.StringSliceFlag{ - Name: "models", - Usage: "A List of models URLs configurations.", - EnvVars: []string{"MODELS"}, - }, - &cli.StringFlag{ - Name: "preload-models-config", - Usage: "A List of models to apply at startup. Path to a YAML config file", - EnvVars: []string{"PRELOAD_MODELS_CONFIG"}, - }, - &cli.StringFlag{ - Name: "config-file", - Usage: "Config file", - EnvVars: []string{"CONFIG_FILE"}, - }, - &cli.StringFlag{ - Name: "address", - Usage: "Bind address for the API server.", - EnvVars: []string{"ADDRESS"}, - Value: ":8080", - }, - &cli.StringFlag{ - Name: "image-path", - Usage: "Image directory", - EnvVars: []string{"IMAGE_PATH"}, - Value: "/tmp/generated/images", - }, - &cli.StringFlag{ - Name: "audio-path", - Usage: "audio directory", - EnvVars: []string{"AUDIO_PATH"}, - Value: "/tmp/generated/audio", - }, - &cli.StringFlag{ - Name: "upload-path", - Usage: "Path to store uploads from files api", - EnvVars: []string{"UPLOAD_PATH"}, - Value: "/tmp/localai/upload", - }, - &cli.StringFlag{ - Name: "config-path", - Usage: "Path to store uploads from files api", - EnvVars: []string{"CONFIG_PATH"}, - Value: "/tmp/localai/config", - }, - &cli.StringFlag{ - Name: "backend-assets-path", - Usage: "Path used to extract libraries that are required by some of the backends in runtime.", - EnvVars: []string{"BACKEND_ASSETS_PATH"}, - Value: "/tmp/localai/backend_data", - }, - &cli.StringSliceFlag{ - Name: "external-grpc-backends", - Usage: "A list of external grpc backends", - EnvVars: []string{"EXTERNAL_GRPC_BACKENDS"}, - }, - &cli.IntFlag{ - Name: "context-size", - Usage: "Default context size of the model", - EnvVars: []string{"CONTEXT_SIZE"}, - Value: 512, - }, - &cli.IntFlag{ - Name: "upload-limit", - Usage: "Default upload-limit. MB", - EnvVars: []string{"UPLOAD_LIMIT"}, - Value: 15, - }, - &cli.StringSliceFlag{ - Name: "api-keys", - Usage: "List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys.", - EnvVars: []string{"API_KEY"}, - }, - &cli.BoolFlag{ - Name: "enable-watchdog-idle", - Usage: "Enable watchdog for stopping idle backends. This will stop the backends if are in idle state for too long.", - EnvVars: []string{"WATCHDOG_IDLE"}, - Value: false, - }, - &cli.BoolFlag{ - Name: "disable-welcome", - Usage: "Disable welcome pages", - EnvVars: []string{"DISABLE_WELCOME"}, - Value: false, - }, - &cli.BoolFlag{ - Name: "enable-watchdog-busy", - Usage: "Enable watchdog for stopping busy backends that exceed a defined threshold.", - EnvVars: []string{"WATCHDOG_BUSY"}, - Value: false, - }, - &cli.StringFlag{ - Name: "watchdog-busy-timeout", - Usage: "Watchdog timeout. This will restart the backend if it crashes.", - EnvVars: []string{"WATCHDOG_BUSY_TIMEOUT"}, - Value: "5m", - }, - &cli.StringFlag{ - Name: "watchdog-idle-timeout", - Usage: "Watchdog idle timeout. This will restart the backend if it crashes.", - EnvVars: []string{"WATCHDOG_IDLE_TIMEOUT"}, - Value: "15m", - }, - &cli.BoolFlag{ - Name: "preload-backend-only", - Usage: "If set, the api is NOT launched, and only the preloaded models / backends are started. This is intended for multi-node setups.", - EnvVars: []string{"PRELOAD_BACKEND_ONLY"}, - Value: false, - }, - &cli.StringFlag{ - Name: "localai-config-dir", - Usage: "Directory to use for the configuration files of LocalAI itself. This is NOT where model files should be placed.", - EnvVars: []string{"LOCALAI_CONFIG_DIR"}, - Value: "./configuration", - }, - }, - Description: ` -LocalAI is a drop-in replacement OpenAI API which runs inference locally. + // Actually parse the CLI options + ctx := kong.Parse(&cli.CLI, + kong.Description( + ` LocalAI is a drop-in replacement OpenAI API for running LLM, GPT and genAI models locally on CPU, GPUs with consumer grade hardware. Some of the models compatible are: -- Vicuna -- Koala -- GPT4ALL -- GPT4ALL-J -- Cerebras -- Alpaca -- StableLM (ggml quantized) + - Vicuna + - Koala + - GPT4ALL + - GPT4ALL-J + - Cerebras + - Alpaca + - StableLM (ggml quantized) -For a list of compatible model, check out: https://localai.io/model-compatibility/index.html +For a list of compatible models, check out: https://localai.io/model-compatibility/index.html + +Copyright: Ettore Di Giacinto + +Version: ${version} `, - UsageText: `local-ai [options]`, - Copyright: "Ettore Di Giacinto", - Action: func(ctx *cli.Context) error { - opts := []config.AppOption{ - config.WithConfigFile(ctx.String("config-file")), - config.WithJSONStringPreload(ctx.String("preload-models")), - config.WithYAMLConfigPreload(ctx.String("preload-models-config")), - config.WithModelPath(ctx.String("models-path")), - config.WithContextSize(ctx.Int("context-size")), - config.WithDebug(ctx.Bool("debug")), - config.WithImageDir(ctx.String("image-path")), - config.WithAudioDir(ctx.String("audio-path")), - config.WithUploadDir(ctx.String("upload-path")), - config.WithConfigsDir(ctx.String("config-path")), - config.WithF16(ctx.Bool("f16")), - config.WithStringGalleries(ctx.String("galleries")), - config.WithModelLibraryURL(ctx.String("remote-library")), - config.WithDisableMessage(false), - config.WithCors(ctx.Bool("cors")), - config.WithCorsAllowOrigins(ctx.String("cors-allow-origins")), - config.WithThreads(ctx.Int("threads")), - config.WithBackendAssets(backendAssets), - config.WithBackendAssetsOutput(ctx.String("backend-assets-path")), - config.WithUploadLimitMB(ctx.Int("upload-limit")), - config.WithApiKeys(ctx.StringSlice("api-keys")), - config.WithModelsURL(append(ctx.StringSlice("models"), ctx.Args().Slice()...)...), - } - - idleWatchDog := ctx.Bool("enable-watchdog-idle") - busyWatchDog := ctx.Bool("enable-watchdog-busy") - - if ctx.Bool("disable-welcome") { - opts = append(opts, config.DisableWelcomePage) - } - - if idleWatchDog || busyWatchDog { - opts = append(opts, config.EnableWatchDog) - if idleWatchDog { - opts = append(opts, config.EnableWatchDogIdleCheck) - dur, err := time.ParseDuration(ctx.String("watchdog-idle-timeout")) - if err != nil { - return err - } - opts = append(opts, config.SetWatchDogIdleTimeout(dur)) - } - if busyWatchDog { - opts = append(opts, config.EnableWatchDogBusyCheck) - dur, err := time.ParseDuration(ctx.String("watchdog-busy-timeout")) - if err != nil { - return err - } - opts = append(opts, config.SetWatchDogBusyTimeout(dur)) - } - } - if ctx.Bool("parallel-requests") { - opts = append(opts, config.EnableParallelBackendRequests) - } - if ctx.Bool("single-active-backend") { - opts = append(opts, config.EnableSingleBackend) - } - - externalgRPC := ctx.StringSlice("external-grpc-backends") - // split ":" to get backend name and the uri - for _, v := range externalgRPC { - backend := v[:strings.IndexByte(v, ':')] - uri := v[strings.IndexByte(v, ':')+1:] - opts = append(opts, config.WithExternalBackend(backend, uri)) - } - - if ctx.Bool("autoload-galleries") { - opts = append(opts, config.EnableGalleriesAutoload) - } - - if ctx.Bool("preload-backend-only") { - _, _, _, err := startup.Startup(opts...) - return err - } - - cl, ml, options, err := startup.Startup(opts...) - - if err != nil { - return fmt.Errorf("failed basic startup tasks with error %s", err.Error()) - } - - configdir := ctx.String("localai-config-dir") - // Watch the configuration directory - // If the directory does not exist, we don't watch it - if _, err := os.Stat(configdir); err == nil { - closeConfigWatcherFn, err := startup.WatchConfigDirectory(ctx.String("localai-config-dir"), options) - defer closeConfigWatcherFn() - - if err != nil { - return fmt.Errorf("failed while watching configuration directory %s", ctx.String("localai-config-dir")) - } - } - - appHTTP, err := http.App(cl, ml, options) - if err != nil { - log.Error().Err(err).Msg("error during HTTP App construction") - return err - } - - return appHTTP.Listen(ctx.String("address")) + ), + kong.UsageOnError(), + kong.Vars{ + "basepath": kong.ExpandPath("."), + "remoteLibraryURL": "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/model_library.yaml", + "version": internal.PrintableVersion(), }, - Commands: []*cli.Command{ - { - Name: "models", - Usage: "List or install models", - Subcommands: []*cli.Command{ - { - Name: "list", - Usage: "List the models avaiable in your galleries", - Action: func(ctx *cli.Context) error { - var galleries []gallery.Gallery - if err := json.Unmarshal([]byte(ctx.String("galleries")), &galleries); err != nil { - log.Error().Err(err).Msg("unable to load galleries") - } + ) - models, err := gallery.AvailableGalleryModels(galleries, ctx.String("models-path")) - if err != nil { - return err - } - for _, model := range models { - if model.Installed { - fmt.Printf(" * %s@%s (installed)\n", model.Gallery.Name, model.Name) - } else { - fmt.Printf(" - %s@%s\n", model.Gallery.Name, model.Name) - } - } - return nil - }, - }, - { - Name: "install", - Usage: "Install a model from the gallery", - Action: func(ctx *cli.Context) error { - modelName := ctx.Args().First() - - var galleries []gallery.Gallery - if err := json.Unmarshal([]byte(ctx.String("galleries")), &galleries); err != nil { - log.Error().Err(err).Msg("unable to load galleries") - } - - progressBar := progressbar.NewOptions( - 1000, - progressbar.OptionSetDescription(fmt.Sprintf("downloading model %s", modelName)), - progressbar.OptionShowBytes(false), - progressbar.OptionClearOnFinish(), - ) - progressCallback := func(fileName string, current string, total string, percentage float64) { - progressBar.Set(int(percentage * 10)) - } - err = gallery.InstallModelFromGallery(galleries, modelName, ctx.String("models-path"), gallery.GalleryModel{}, progressCallback) - if err != nil { - return err - } - return nil - }, - }, - }, - }, - { - Name: "tts", - Usage: "Convert text to speech", - Flags: []cli.Flag{ - &cli.StringFlag{ - Name: "backend", - Value: "piper", - Aliases: []string{"b"}, - Usage: "Backend to run the TTS model", - }, - &cli.StringFlag{ - Name: "model", - Aliases: []string{"m"}, - Usage: "Model name to run the TTS", - Required: true, - }, - &cli.StringFlag{ - Name: "voice", - Aliases: []string{"v"}, - Usage: "Voice name to run the TTS (optional)", - Required: true, - }, - &cli.StringFlag{ - Name: "output-file", - Aliases: []string{"o"}, - Usage: "The path to write the output wav file", - }, - }, - Action: func(ctx *cli.Context) error { - modelOption := ctx.String("model") - if modelOption == "" { - return errors.New("--model parameter is required") - } - backendOption := ctx.String("backend") - if backendOption == "" { - backendOption = "piper" - } - outputFile := ctx.String("output-file") - outputDir := ctx.String("backend-assets-path") - if outputFile != "" { - outputDir = filepath.Dir(outputFile) - } - - text := strings.Join(ctx.Args().Slice(), " ") - - opts := &config.ApplicationConfig{ - ModelPath: ctx.String("models-path"), - Context: context.Background(), - AudioDir: outputDir, - AssetsDestination: ctx.String("backend-assets-path"), - } - ml := model.NewModelLoader(opts.ModelPath) - - defer ml.StopAllGRPC() - - filePath, _, err := backend.ModelTTS(backendOption, text, modelOption, ctx.String("voice"), ml, opts, config.BackendConfig{}) - if err != nil { - return err - } - if outputFile != "" { - if err := os.Rename(filePath, outputFile); err != nil { - return err - } - fmt.Printf("Generate file %s\n", outputFile) - } else { - fmt.Printf("Generate file %s\n", filePath) - } - return nil - }, - }, - { - Name: "transcript", - Usage: "Convert audio to text", - Flags: []cli.Flag{ - &cli.StringFlag{ - Name: "backend", - Value: "whisper", - Aliases: []string{"b"}, - Usage: "Backend to run the transcription model", - }, - &cli.StringFlag{ - Name: "model", - Aliases: []string{"m"}, - Usage: "Model name to run the transcription", - }, - &cli.StringFlag{ - Name: "language", - Aliases: []string{"l"}, - Usage: "Language of the audio file", - }, - &cli.IntFlag{ - Name: "threads", - Aliases: []string{"t"}, - Usage: "Threads to use", - Value: 1, - }, - &cli.StringFlag{ - Name: "output-file", - Aliases: []string{"o"}, - Usage: "The path to write the output wav file", - }, - }, - Action: func(ctx *cli.Context) error { - modelOption := ctx.String("model") - filename := ctx.Args().First() - language := ctx.String("language") - threads := ctx.Int("threads") - - opts := &config.ApplicationConfig{ - ModelPath: ctx.String("models-path"), - Context: context.Background(), - AssetsDestination: ctx.String("backend-assets-path"), - } - - cl := config.NewBackendConfigLoader() - ml := model.NewModelLoader(opts.ModelPath) - if err := cl.LoadBackendConfigsFromPath(ctx.String("models-path")); err != nil { - return err - } - - c, exists := cl.GetBackendConfig(modelOption) - if !exists { - return errors.New("model not found") - } - - c.Threads = &threads - - defer ml.StopAllGRPC() - - tr, err := backend.ModelTranscription(filename, language, ml, c, opts) - if err != nil { - return err - } - for _, segment := range tr.Segments { - fmt.Println(segment.Start.String(), "-", segment.Text) - } - return nil - }, - }, - }, + // Configure the logging level before we run the application + // This is here to preserve the existing --debug flag functionality + logLevel := "info" + if cli.CLI.Debug && cli.CLI.LogLevel == nil { + logLevel = "debug" + zerolog.SetGlobalLevel(zerolog.DebugLevel) + cli.CLI.LogLevel = &logLevel } - err = app.Run(os.Args) - if err != nil { - log.Error().Err(err).Msg("application runtime error") - os.Exit(1) + if cli.CLI.LogLevel == nil { + cli.CLI.LogLevel = &logLevel } + + switch *cli.CLI.LogLevel { + case "error": + log.Info().Msg("Setting logging to error") + zerolog.SetGlobalLevel(zerolog.ErrorLevel) + case "warn": + log.Info().Msg("Setting logging to warn") + zerolog.SetGlobalLevel(zerolog.WarnLevel) + case "info": + log.Info().Msg("Setting logging to info") + zerolog.SetGlobalLevel(zerolog.InfoLevel) + case "debug": + log.Info().Msg("Setting logging to debug") + zerolog.SetGlobalLevel(zerolog.DebugLevel) + } + + // Populate the application with the embedded backend assets + cli.CLI.Context.BackendAssets = backendAssets + + // Run the thing! + err = ctx.Run(&cli.CLI.Context) + + ctx.FatalIfErrorf(err) } From 0e44a4e66442327fa4bf340322f2f85ca7308500 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 11 Apr 2024 09:19:46 +0200 Subject: [PATCH 0011/2648] :arrow_up: Update docs version mudler/LocalAI (#1988) Signed-off-by: GitHub Co-authored-by: mudler --- docs/data/version.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/data/version.json b/docs/data/version.json index cc0478ca..1b6a2161 100644 --- a/docs/data/version.json +++ b/docs/data/version.json @@ -1,3 +1,3 @@ { - "version": "v2.12.1" + "version": "v2.12.3" } From e152b07b74cda26f2513fb85755c6b860e7ca65f Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 11 Apr 2024 09:22:07 +0200 Subject: [PATCH 0012/2648] :arrow_up: Update ggerganov/llama.cpp (#1991) Signed-off-by: GitHub Co-authored-by: mudler --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 337ebc64..e2e4f211 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=1b67731e184e27a465b8c5476061294a4af668ea +CPPLLAMA_VERSION?=8228b66dbc16290c5cbd70e80ab47c068e2569d8 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From b4548ad72dc31a00a2a819c3bf540012bf11432a Mon Sep 17 00:00:00 2001 From: Ludovic Leroux Date: Thu, 11 Apr 2024 03:44:39 -0400 Subject: [PATCH 0013/2648] feat: add flash-attn in nvidia and rocm envs (#1995) Signed-off-by: Ludovic LEROUX --- .../python/common-env/transformers/install.sh | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/backend/python/common-env/transformers/install.sh b/backend/python/common-env/transformers/install.sh index 8502adde..30ec0de0 100644 --- a/backend/python/common-env/transformers/install.sh +++ b/backend/python/common-env/transformers/install.sh @@ -2,6 +2,7 @@ set -ex SKIP_CONDA=${SKIP_CONDA:-0} +REQUIREMENTS_FILE=$1 # Check if environment exist conda_env_exists(){ @@ -14,7 +15,7 @@ else export PATH=$PATH:/opt/conda/bin if conda_env_exists "transformers" ; then echo "Creating virtual environment..." - conda env create --name transformers --file $1 + conda env create --name transformers --file $REQUIREMENTS_FILE echo "Virtual environment created." else echo "Virtual environment already exists." @@ -28,11 +29,16 @@ if [ -d "/opt/intel" ]; then pip install intel-extension-for-transformers datasets sentencepiece tiktoken neural_speed optimum[openvino] fi -if [ "$PIP_CACHE_PURGE" = true ] ; then - if [ $SKIP_CONDA -eq 0 ]; then - # Activate conda environment - source activate transformers - fi +# If we didn't skip conda, activate the environment +# to install FlashAttention +if [ $SKIP_CONDA -eq 0 ]; then + source activate transformers +fi +if [[ $REQUIREMENTS_FILE =~ -nvidia.yml$ ]]; then + #TODO: FlashAttention is supported on nvidia and ROCm, but ROCm install can't be done this easily + pip install flash-attn --no-build-isolation +fi +if [ "$PIP_CACHE_PURGE" = true ] ; then pip cache purge fi \ No newline at end of file From c74dec7e387160fa4ab1fcebed94d8d9197fa1c4 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 11 Apr 2024 11:47:54 +0200 Subject: [PATCH 0014/2648] Add dependabot.yml Signed-off-by: Ettore Di Giacinto --- .github/dependabot.yml | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..52abf1db --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,25 @@ +# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file +version: 2 +updates: + - package-ecosystem: "gomod" + directory: "/" + schedule: + interval: "weekly" + - package-ecosystem: "github-actions" + # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.) + directory: "/" + schedule: + # Check for updates to GitHub Actions every weekday + interval: "weekly" + - package-ecosystem: "pip" + # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.) + directory: "/" + schedule: + # Check for updates to GitHub Actions every weekday + interval: "weekly" + - package-ecosystem: "docker" + # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.) + directory: "/" + schedule: + # Check for updates to GitHub Actions every weekday + interval: "weekly" From 182fef339d801744c39420e10d27e98ee9f965e5 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 11 Apr 2024 12:13:06 +0200 Subject: [PATCH 0015/2648] Create dependabot_auto.yml Signed-off-by: Ettore Di Giacinto --- .github/workflows/dependabot_auto.yml | 44 +++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 .github/workflows/dependabot_auto.yml diff --git a/.github/workflows/dependabot_auto.yml b/.github/workflows/dependabot_auto.yml new file mode 100644 index 00000000..12541d05 --- /dev/null +++ b/.github/workflows/dependabot_auto.yml @@ -0,0 +1,44 @@ +name: Dependabot auto-merge +on: + pull_request_target: + types: [review_requested] + +permissions: + contents: write + pull-requests: write + packages: read + +jobs: + dependabot: + runs-on: ubuntu-latest + if: ${{ github.actor == 'dependabot[bot]' }} + steps: + - name: Dependabot metadata + id: metadata + uses: dependabot/fetch-metadata@v1.3.4 + with: + github-token: "${{ secrets.GITHUB_TOKEN }}" + skip-commit-verification: true + + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Approve a PR if not already approved + run: | + gh pr checkout "$PR_URL" + if [ "$(gh pr status --json reviewDecision -q .currentBranch.reviewDecision)" != "APPROVED" ]; + then + gh pr review --approve "$PR_URL" + else + echo "PR already approved."; + fi + env: + PR_URL: ${{github.event.pull_request.html_url}} + GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} + + - name: Enable auto-merge for Dependabot PRs + if: ${{ contains(github.event.pull_request.title, 'bump')}} + run: gh pr merge --auto --merge "$PR_URL" + env: + PR_URL: ${{github.event.pull_request.html_url}} + GITHUB_TOKEN: ${{secrets.RELEASE_TOKEN}} From a49248d29f637c424a29aea28e4ef947cda99b9a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 11 Apr 2024 11:07:45 +0000 Subject: [PATCH 0016/2648] build(deps): bump google.golang.org/protobuf from 1.31.0 to 1.33.0 (#1998) Bumps google.golang.org/protobuf from 1.31.0 to 1.33.0. --- updated-dependencies: - dependency-name: google.golang.org/protobuf dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 20 ++++++++++++++++++-- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index fac7acfd..d065436c 100644 --- a/go.mod +++ b/go.mod @@ -42,7 +42,7 @@ require ( go.opentelemetry.io/otel/metric v1.19.0 go.opentelemetry.io/otel/sdk/metric v1.19.0 google.golang.org/grpc v1.59.0 - google.golang.org/protobuf v1.31.0 + google.golang.org/protobuf v1.33.0 gopkg.in/yaml.v2 v2.4.0 gopkg.in/yaml.v3 v3.0.1 ) diff --git a/go.sum b/go.sum index dc08c465..8b3a8cc4 100644 --- a/go.sum +++ b/go.sum @@ -3,6 +3,8 @@ github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc= github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE= +github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf h1:UgjXLcE9I+VaVz7uBIlzAnyZIXwiDlIiTWqCh159aUI= +github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf/go.mod h1:UOf2Mb/deUri5agct5OJ4SLWjhI+kZKbsUVUeRb24I0= github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI= github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU= github.com/Masterminds/semver/v3 v3.2.0 h1:3MEsd0SM6jqZojhjLWWeBY+Kcjy9i6MQAeY7YgDP83g= @@ -64,6 +66,8 @@ github.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKoh github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec= github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw= github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= +github.com/donomii/go-rwkv.cpp v0.0.0-20230715075832-c898cd0f62df h1:qVcBEZlvp5A1gGWNJj02xyDtbsUI2hohlQMSB1fgER4= +github.com/donomii/go-rwkv.cpp v0.0.0-20230715075832-c898cd0f62df/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM= github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 h1:iFaUwBSo5Svw6L7HYpRu/0lE3e0BaElwnNO1qkNQxBY= github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s= github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY= @@ -72,6 +76,8 @@ github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMo github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= +github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230628193450-85ed71aaec8e h1:KtbU2JR3lJuXFASHG2+sVLucfMPBjWKUUKByX6C81mQ= +github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230628193450-85ed71aaec8e/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo= github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4= github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs= github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA= @@ -93,6 +99,10 @@ github.com/go-openapi/spec v0.21.0 h1:LTVzPc3p/RzRnkQqLRndbAzjY0d0BCL72A6j3CdL9Z github.com/go-openapi/spec v0.21.0/go.mod h1:78u6VdPw81XU44qEWGhtr982gJ5BWg2c0I5XwVMotYk= github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE= github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= +github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1 h1:yXvc7QfGtoZ51tUW/YVjoTwAfh8HG88XU7UOrbNlz5Y= +github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1/go.mod h1:fYjkCDRzC+oRLHSjQoajmYK6AmeJnmEanV27CClAcDc= +github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428 h1:WYjkXL0Nw7dN2uDBMVCWQ8xLavrIhjF/DLczuh5L9TY= +github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428/go.mod h1:iub0ugfTnflE3rcIuqV2pQSo15nEw3GLW/utm5gyERo= github.com/go-sql-driver/mysql v1.7.1 h1:lUIinVbN1DY0xBg0eMOzmmtGoHwWBbvnWubQUrtU8EI= github.com/go-sql-driver/mysql v1.7.1/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI= github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE= @@ -217,12 +227,18 @@ github.com/moby/sys/mountinfo v0.5.0/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdx github.com/moby/term v0.0.0-20201216013528-df9cb8a40635 h1:rzf0wL0CHVc8CEsgyygG0Mn9CNCCPZqOPaz8RiiHYQk= github.com/moby/term v0.0.0-20201216013528-df9cb8a40635/go.mod h1:FBS0z0QWA44HXygs7VXDUOGoN/1TV3RuWkLO04am3wc= github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ= +github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760 h1:OFVkSxR7CRSRSNm5dvpMRZwmSwWa8EMMnHbc84fW5tU= +github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig= github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c h1:CI5uGwqBpN8N7BrSKC+nmdfw+9nPQIDyjHHlaIiitZI= github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c/go.mod h1:gY3wyrhkRySJtmtI/JPt4a2mKv48h/M9pEZIW+SjeC0= +github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af h1:XFq6OUqsWQam0OrEr05okXsJK/TQur3zoZTHbiZD3Ks= +github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af/go.mod h1:8ufRkpz/S/9ahkaxzZ5i4WMgO9w4InEhuRoT7vK5Rnw= github.com/muesli/reflow v0.3.0 h1:IFsN6K9NfGtjeggFP+68I4chLZV2yIKsXJFNZ+eWh6s= github.com/muesli/reflow v0.3.0/go.mod h1:pbwTDkVPibjO2kyvBQRBxTWEEGDGq0FlB1BIKtnHY/8= github.com/muesli/termenv v0.13.0 h1:wK20DRpJdDX8b7Ek2QfhvqhRQFZ237RGRO0RQ/Iqdy0= github.com/muesli/termenv v0.13.0/go.mod h1:sP1+uffeLaEYpyOTb8pLCUctGcGLnoFjSn4YJK5e2bc= +github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20231022042237-c25dc5193530 h1:YXMxHwHMB9jCBo2Yu5gz3mTB3T1TnZs/HmPLv15LUSA= +github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20231022042237-c25dc5193530/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI= github.com/nwaples/rardecode v1.1.0 h1:vSxaY8vQhOcVr4mm5e8XllHWTiM4JF507A0Katqw7MQ= github.com/nwaples/rardecode v1.1.0/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0= github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= @@ -483,8 +499,8 @@ google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2 google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= -google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= +google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= From 079597548650a9665baa82b89e2eeafb66debcd6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 11 Apr 2024 11:44:34 +0000 Subject: [PATCH 0017/2648] build(deps): bump github.com/docker/docker from 20.10.7+incompatible to 24.0.9+incompatible (#1999) build(deps): bump github.com/docker/docker Bumps [github.com/docker/docker](https://github.com/docker/docker) from 20.10.7+incompatible to 24.0.9+incompatible. - [Release notes](https://github.com/docker/docker/releases) - [Commits](https://github.com/docker/docker/compare/v20.10.7...v24.0.9) --- updated-dependencies: - dependency-name: github.com/docker/docker dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index d065436c..e60c7672 100644 --- a/go.mod +++ b/go.mod @@ -75,7 +75,7 @@ require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/dlclark/regexp2 v1.8.1 // indirect github.com/docker/cli v20.10.17+incompatible // indirect - github.com/docker/docker v20.10.7+incompatible // indirect + github.com/docker/docker v24.0.9+incompatible // indirect github.com/docker/go-connections v0.4.0 // indirect github.com/docker/go-units v0.4.0 // indirect github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect diff --git a/go.sum b/go.sum index 8b3a8cc4..2141db5f 100644 --- a/go.sum +++ b/go.sum @@ -60,8 +60,8 @@ github.com/dlclark/regexp2 v1.8.1 h1:6Lcdwya6GjPUNsBct8Lg/yRPwMhABj269AAzdGSiR+0 github.com/dlclark/regexp2 v1.8.1/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= github.com/docker/cli v20.10.17+incompatible h1:eO2KS7ZFeov5UJeaDmIs1NFEDRf32PaqRpvoEkKBy5M= github.com/docker/cli v20.10.17+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8= -github.com/docker/docker v20.10.7+incompatible h1:Z6O9Nhsjv+ayUEeI1IojKbYcsGdgYSNqxe1s2MYzUhQ= -github.com/docker/docker v20.10.7+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= +github.com/docker/docker v24.0.9+incompatible h1:HPGzNmwfLZWdxHqK9/II92pyi1EpYKsAqcl4G0Of9v0= +github.com/docker/docker v24.0.9+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= github.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKohAFqRJQ= github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec= github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw= From fdfd868953a9e40d4e1b9a2eb6d428645572311d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 11 Apr 2024 12:21:52 +0000 Subject: [PATCH 0018/2648] build(deps): bump github.com/gofiber/fiber/v2 from 2.52.0 to 2.52.1 (#2001) Bumps [github.com/gofiber/fiber/v2](https://github.com/gofiber/fiber) from 2.52.0 to 2.52.1. - [Release notes](https://github.com/gofiber/fiber/releases) - [Commits](https://github.com/gofiber/fiber/compare/v2.52.0...v2.52.1) --- updated-dependencies: - dependency-name: github.com/gofiber/fiber/v2 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index e60c7672..238e150f 100644 --- a/go.mod +++ b/go.mod @@ -12,7 +12,7 @@ require ( github.com/go-audio/wav v1.1.0 github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1 github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428 - github.com/gofiber/fiber/v2 v2.52.0 + github.com/gofiber/fiber/v2 v2.52.1 github.com/gofiber/swagger v1.0.0 github.com/gofiber/template/html/v2 v2.1.1 github.com/google/uuid v1.5.0 diff --git a/go.sum b/go.sum index 2141db5f..c66e9b7c 100644 --- a/go.sum +++ b/go.sum @@ -110,8 +110,8 @@ github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEe github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/godbus/dbus/v5 v5.0.6/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= -github.com/gofiber/fiber/v2 v2.52.0 h1:S+qXi7y+/Pgvqq4DrSmREGiFwtB7Bu6+QFLuIHYw/UE= -github.com/gofiber/fiber/v2 v2.52.0/go.mod h1:KEOE+cXMhXG0zHc9d8+E38hoX+ZN7bhOtgeF2oT6jrQ= +github.com/gofiber/fiber/v2 v2.52.1 h1:1RoU2NS+b98o1L77sdl5mboGPiW+0Ypsi5oLmcYlgHI= +github.com/gofiber/fiber/v2 v2.52.1/go.mod h1:KEOE+cXMhXG0zHc9d8+E38hoX+ZN7bhOtgeF2oT6jrQ= github.com/gofiber/swagger v1.0.0 h1:BzUzDS9ZT6fDUa692kxmfOjc1DZiloLiPK/W5z1H1tc= github.com/gofiber/swagger v1.0.0/go.mod h1:QrYNF1Yrc7ggGK6ATsJ6yfH/8Zi5bu9lA7wB8TmCecg= github.com/gofiber/template v1.8.3 h1:hzHdvMwMo/T2kouz2pPCA0zGiLCeMnoGsQZBTSYgZxc= From 40781ac013e4fd2574f1faef0bf5a0d491317a34 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 11 Apr 2024 12:48:30 +0000 Subject: [PATCH 0019/2648] build(deps): bump actions/checkout from 3 to 4 (#2002) Bumps [actions/checkout](https://github.com/actions/checkout) from 3 to 4. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v3...v4) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/secscan.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/secscan.yaml b/.github/workflows/secscan.yaml index a5221b40..14958070 100644 --- a/.github/workflows/secscan.yaml +++ b/.github/workflows/secscan.yaml @@ -14,7 +14,7 @@ jobs: GO111MODULE: on steps: - name: Checkout Source - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Run Gosec Security Scanner uses: securego/gosec@master with: From 11a0418510aa2fba956251ec09dee442e906fcb2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 11 Apr 2024 13:10:32 +0000 Subject: [PATCH 0020/2648] build(deps): bump actions/setup-go from 4 to 5 (#2003) Bumps [actions/setup-go](https://github.com/actions/setup-go) from 4 to 5. - [Release notes](https://github.com/actions/setup-go/releases) - [Commits](https://github.com/actions/setup-go/compare/v4...v5) --- updated-dependencies: - dependency-name: actions/setup-go dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/release.yaml | 6 +++--- .github/workflows/test.yml | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 1d749189..269a10c5 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -33,7 +33,7 @@ jobs: uses: actions/checkout@v4 with: submodules: true - - uses: actions/setup-go@v4 + - uses: actions/setup-go@v5 with: go-version: '1.21.x' cache: false @@ -100,7 +100,7 @@ jobs: uses: actions/checkout@v4 with: submodules: true - - uses: actions/setup-go@v4 + - uses: actions/setup-go@v5 with: go-version: '1.21.x' cache: false @@ -138,7 +138,7 @@ jobs: uses: actions/checkout@v4 with: submodules: true - - uses: actions/setup-go@v4 + - uses: actions/setup-go@v5 with: go-version: '1.21.x' cache: false diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 95d10862..28a221bb 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -60,7 +60,7 @@ jobs: with: submodules: true - name: Setup Go ${{ matrix.go-version }} - uses: actions/setup-go@v4 + uses: actions/setup-go@v5 with: go-version: ${{ matrix.go-version }} cache: false @@ -177,7 +177,7 @@ jobs: with: submodules: true - name: Setup Go ${{ matrix.go-version }} - uses: actions/setup-go@v4 + uses: actions/setup-go@v5 with: go-version: ${{ matrix.go-version }} cache: false From 821cf0e3fd80a14688a4ebb432d0b9e6cb8a3d31 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 11 Apr 2024 13:58:04 +0000 Subject: [PATCH 0021/2648] build(deps): bump peter-evans/create-pull-request from 5 to 6 (#2005) Bumps [peter-evans/create-pull-request](https://github.com/peter-evans/create-pull-request) from 5 to 6. - [Release notes](https://github.com/peter-evans/create-pull-request/releases) - [Commits](https://github.com/peter-evans/create-pull-request/compare/v5...v6) --- updated-dependencies: - dependency-name: peter-evans/create-pull-request dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/bump_deps.yaml | 2 +- .github/workflows/bump_docs.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/bump_deps.yaml b/.github/workflows/bump_deps.yaml index 756398d1..2abb2cab 100644 --- a/.github/workflows/bump_deps.yaml +++ b/.github/workflows/bump_deps.yaml @@ -49,7 +49,7 @@ jobs: run: | bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }} - name: Create Pull Request - uses: peter-evans/create-pull-request@v5 + uses: peter-evans/create-pull-request@v6 with: token: ${{ secrets.UPDATE_BOT_TOKEN }} push-to-fork: ci-forks/LocalAI diff --git a/.github/workflows/bump_docs.yaml b/.github/workflows/bump_docs.yaml index 7d52359f..c3ab1698 100644 --- a/.github/workflows/bump_docs.yaml +++ b/.github/workflows/bump_docs.yaml @@ -17,7 +17,7 @@ jobs: run: | bash .github/bump_docs.sh ${{ matrix.repository }} - name: Create Pull Request - uses: peter-evans/create-pull-request@v5 + uses: peter-evans/create-pull-request@v6 with: token: ${{ secrets.UPDATE_BOT_TOKEN }} push-to-fork: ci-forks/LocalAI From 0a6956b029593dd89ba8dfd8241e01c26d19b887 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 11 Apr 2024 14:35:27 +0000 Subject: [PATCH 0022/2648] build(deps): bump actions/cache from 3 to 4 (#2006) Bumps [actions/cache](https://github.com/actions/cache) from 3 to 4. - [Release notes](https://github.com/actions/cache/releases) - [Changelog](https://github.com/actions/cache/blob/main/RELEASES.md) - [Commits](https://github.com/actions/cache/compare/v3...v4) --- updated-dependencies: - dependency-name: actions/cache dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/release.yaml | 2 +- .github/workflows/test.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 269a10c5..8341a188 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -55,7 +55,7 @@ jobs: sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION} - name: Cache grpc id: cache-grpc - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: grpc key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }} diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 28a221bb..02093b3f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -92,7 +92,7 @@ jobs: GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build - name: Cache grpc id: cache-grpc - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: grpc key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }} From b606c7b7680d5592e29228daa133d88b16fbae19 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 11 Apr 2024 14:44:02 +0000 Subject: [PATCH 0023/2648] build(deps): bump actions/upload-artifact from 3 to 4 (#2007) Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 3 to 4. - [Release notes](https://github.com/actions/upload-artifact/releases) - [Commits](https://github.com/actions/upload-artifact/compare/v3...v4) --- updated-dependencies: - dependency-name: actions/upload-artifact dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/release.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 8341a188..8198fb3d 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -82,7 +82,7 @@ jobs: else STATIC=true make dist fi - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: ${{ matrix.build }} path: release/ @@ -111,7 +111,7 @@ jobs: run: | make backend-assets/grpc/stablediffusion mkdir -p release && cp backend-assets/grpc/stablediffusion release - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: stablediffusion path: release/ @@ -154,7 +154,7 @@ jobs: export C_INCLUDE_PATH=/usr/local/include export CPLUS_INCLUDE_PATH=/usr/local/include make dist - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: ${{ matrix.build }} path: release/ From fce606fc0f2a116b25dc51fa51118a6642ba34b3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 11 Apr 2024 15:41:58 +0000 Subject: [PATCH 0024/2648] build(deps): bump github.com/charmbracelet/glamour from 0.6.0 to 0.7.0 (#2004) Bumps [github.com/charmbracelet/glamour](https://github.com/charmbracelet/glamour) from 0.6.0 to 0.7.0. - [Release notes](https://github.com/charmbracelet/glamour/releases) - [Commits](https://github.com/charmbracelet/glamour/compare/v0.6.0...v0.7.0) --- updated-dependencies: - dependency-name: github.com/charmbracelet/glamour dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 12 ++++++------ go.sum | 30 +++++++++++++----------------- 2 files changed, 19 insertions(+), 23 deletions(-) diff --git a/go.mod b/go.mod index 238e150f..081c25d6 100644 --- a/go.mod +++ b/go.mod @@ -5,7 +5,7 @@ go 1.21 require ( github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf github.com/Masterminds/sprig/v3 v3.2.3 - github.com/charmbracelet/glamour v0.6.0 + github.com/charmbracelet/glamour v0.7.0 github.com/donomii/go-rwkv.cpp v0.0.0-20230715075832-c898cd0f62df github.com/fsnotify/fsnotify v1.7.0 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230628193450-85ed71aaec8e @@ -65,8 +65,8 @@ require ( github.com/Masterminds/semver/v3 v3.2.0 // indirect github.com/Microsoft/go-winio v0.6.0 // indirect github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 // indirect - github.com/alecthomas/chroma v0.10.0 // indirect - github.com/aymanbagabas/go-osc52 v1.0.3 // indirect + github.com/alecthomas/chroma/v2 v2.8.0 // indirect + github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect github.com/aymerick/douceur v0.2.0 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/cenkalti/backoff/v4 v4.1.3 // indirect @@ -104,7 +104,7 @@ require ( github.com/mitchellh/reflectwalk v1.0.0 // indirect github.com/moby/term v0.0.0-20201216013528-df9cb8a40635 // indirect github.com/muesli/reflow v0.3.0 // indirect - github.com/muesli/termenv v0.13.0 // indirect + github.com/muesli/termenv v0.15.2 // indirect github.com/nwaples/rardecode v1.1.0 // indirect github.com/olekukonko/tablewriter v0.0.5 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect @@ -126,8 +126,8 @@ require ( github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect github.com/xeipuuv/gojsonschema v1.2.0 // indirect github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect - github.com/yuin/goldmark v1.5.2 // indirect - github.com/yuin/goldmark-emoji v1.0.1 // indirect + github.com/yuin/goldmark v1.5.4 // indirect + github.com/yuin/goldmark-emoji v1.0.2 // indirect go.opentelemetry.io/otel/sdk v1.19.0 // indirect go.opentelemetry.io/otel/trace v1.19.0 // indirect golang.org/x/crypto v0.21.0 // indirect diff --git a/go.sum b/go.sum index c66e9b7c..359bc836 100644 --- a/go.sum +++ b/go.sum @@ -17,8 +17,8 @@ github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 h1:TngWCqHvy9oXAN6lEV github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5/go.mod h1:lmUJ/7eu/Q8D7ML55dXQrVaamCz2vxCfdQBasLZfHKk= github.com/alecthomas/assert/v2 v2.6.0 h1:o3WJwILtexrEUk3cUVal3oiQY2tfgr/FHWiz/v2n4FU= github.com/alecthomas/assert/v2 v2.6.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k= -github.com/alecthomas/chroma v0.10.0 h1:7XDcGkCQopCNKjZHfYrNLraA+M7e0fMiJ/Mfikbfjek= -github.com/alecthomas/chroma v0.10.0/go.mod h1:jtJATyUxlIORhUOFNA9NZDWGAQ8wpxQQqNSB4rjA/1s= +github.com/alecthomas/chroma/v2 v2.8.0 h1:w9WJUjFFmHHB2e8mRpL9jjy3alYDlU0QLDezj1xE264= +github.com/alecthomas/chroma/v2 v2.8.0/go.mod h1:yrkMI9807G1ROx13fhe1v6PN2DDeaR73L3d+1nmYQtw= github.com/alecthomas/kong v0.9.0 h1:G5diXxc85KvoV2f0ZRVuMsi45IrBgx9zDNGNj165aPA= github.com/alecthomas/kong v0.9.0/go.mod h1:Y47y5gKfHp1hDc7CH7OeXgLIpp+Q2m1Ni0L5s3bI8Os= github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc= @@ -26,8 +26,8 @@ github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW5 github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y= github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs= github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= -github.com/aymanbagabas/go-osc52 v1.0.3 h1:DTwqENW7X9arYimJrPeGZcV0ln14sGMt3pHZspWD+Mg= -github.com/aymanbagabas/go-osc52 v1.0.3/go.mod h1:zT8H+Rk4VSabYN90pWyugflM3ZhpTZNC7cASDfUCdT4= +github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k= +github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8= github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk= github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= @@ -36,8 +36,8 @@ github.com/cenkalti/backoff/v4 v4.1.3 h1:cFAlzYUlVYDysBEH2T5hyJZMh3+5+WCBvSnK6Q8 github.com/cenkalti/backoff/v4 v4.1.3/go.mod h1:scbssz8iZGpm3xbr14ovlUdkxfGXNInqkPWOWmG2CLw= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/charmbracelet/glamour v0.6.0 h1:wi8fse3Y7nfcabbbDuwolqTqMQPMnVPeZhDM273bISc= -github.com/charmbracelet/glamour v0.6.0/go.mod h1:taqWV4swIMMbWALc0m7AfE9JkPSU8om2538k9ITBxOc= +github.com/charmbracelet/glamour v0.7.0 h1:2BtKGZ4iVJCDfMF229EzbeR1QRKLWztO9dMtjmqZSng= +github.com/charmbracelet/glamour v0.7.0/go.mod h1:jUMh5MeihljJPQbJ/wf4ldw2+yBP59+ctV36jASy7ps= github.com/checkpoint-restore/go-criu/v5 v5.3.0/go.mod h1:E/eQpaFtUKGOOSEBZgmKAcn+zUUwWxqcaKZlF54wK8E= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= @@ -55,7 +55,6 @@ github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxG github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/dlclark/regexp2 v1.4.0/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc= github.com/dlclark/regexp2 v1.8.1 h1:6Lcdwya6GjPUNsBct8Lg/yRPwMhABj269AAzdGSiR+0= github.com/dlclark/regexp2 v1.8.1/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= github.com/docker/cli v20.10.17+incompatible h1:eO2KS7ZFeov5UJeaDmIs1NFEDRf32PaqRpvoEkKBy5M= @@ -212,7 +211,6 @@ github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zk github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4= github.com/mholt/archiver/v3 v3.5.1 h1:rDjOBX9JSF5BvoJGvjqK479aL70qh9DIpZCl+k7Clwo= github.com/mholt/archiver/v3 v3.5.1/go.mod h1:e3dqJ7H78uzsRSEACH1joayhuSyhnonssnDhppzS1L4= -github.com/microcosm-cc/bluemonday v1.0.21/go.mod h1:ytNkv4RrDrLJ2pqlsSI46O6IVXmZOBBD4SaJyDwwTkM= github.com/microcosm-cc/bluemonday v1.0.26 h1:xbqSvqzQMeEHCqMi64VAs4d8uy6Mequs3rQ0k/Khz58= github.com/microcosm-cc/bluemonday v1.0.26/go.mod h1:JyzOCs9gkyQyjs+6h10UEVSe02CGwkhd72Xdqh78TWs= github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ= @@ -235,8 +233,8 @@ github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af h1:XFq6 github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af/go.mod h1:8ufRkpz/S/9ahkaxzZ5i4WMgO9w4InEhuRoT7vK5Rnw= github.com/muesli/reflow v0.3.0 h1:IFsN6K9NfGtjeggFP+68I4chLZV2yIKsXJFNZ+eWh6s= github.com/muesli/reflow v0.3.0/go.mod h1:pbwTDkVPibjO2kyvBQRBxTWEEGDGq0FlB1BIKtnHY/8= -github.com/muesli/termenv v0.13.0 h1:wK20DRpJdDX8b7Ek2QfhvqhRQFZ237RGRO0RQ/Iqdy0= -github.com/muesli/termenv v0.13.0/go.mod h1:sP1+uffeLaEYpyOTb8pLCUctGcGLnoFjSn4YJK5e2bc= +github.com/muesli/termenv v0.15.2 h1:GohcuySI0QmI3wN8Ok9PtKGkgkFIk7y6Vpb5PvrY+Wo= +github.com/muesli/termenv v0.15.2/go.mod h1:Epx+iuz8sNs7mNKhxzH4fWXGNpZwUaJKRS1noLXviQ8= github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20231022042237-c25dc5193530 h1:YXMxHwHMB9jCBo2Yu5gz3mTB3T1TnZs/HmPLv15LUSA= github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20231022042237-c25dc5193530/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI= github.com/nwaples/rardecode v1.1.0 h1:vSxaY8vQhOcVr4mm5e8XllHWTiM4JF507A0Katqw7MQ= @@ -330,7 +328,6 @@ github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXf github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= @@ -371,11 +368,12 @@ github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofm github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.3.7/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= -github.com/yuin/goldmark v1.5.2 h1:ALmeCk/px5FSm1MAcFBAsVKZjDuMVj8Tm7FFIlMJnqU= -github.com/yuin/goldmark v1.5.2/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= -github.com/yuin/goldmark-emoji v1.0.1 h1:ctuWEyzGBwiucEqxzwe0SOYDXPAucOrE9NQC18Wa1os= -github.com/yuin/goldmark-emoji v1.0.1/go.mod h1:2w1E6FEWLcDQkoTE+7HU6QF1F6SLlNGjRIBbIZQFqkQ= +github.com/yuin/goldmark v1.5.4 h1:2uY/xC0roWy8IBEGLgB1ywIoEJFGmRrX21YQcvGZzjU= +github.com/yuin/goldmark v1.5.4/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +github.com/yuin/goldmark-emoji v1.0.2 h1:c/RgTShNgHTtc6xdz2KKI74jJr6rWi7FPgnP9GAsO5s= +github.com/yuin/goldmark-emoji v1.0.2/go.mod h1:RhP/RWpexdp+KHs7ghKnifRoIs/Bq4nDS7tRbCkOwKY= github.com/yusufpapurcu/wmi v1.2.3 h1:E1ctvB7uKFMOJw3fdOW32DwGE9I7t++CRUEMKvFoFiw= github.com/yusufpapurcu/wmi v1.2.3/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= go.opentelemetry.io/otel v1.19.0 h1:MuS/TNf4/j4IXsZuJegVzI1cwut7Qc00344rgH7p8bs= @@ -413,7 +411,6 @@ golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= -golang.org/x/net v0.0.0-20221002022538-bcab6841153b/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk= golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY= golang.org/x/net v0.22.0 h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc= golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= @@ -450,7 +447,6 @@ golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20211116061358-0a5406a5449c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= From b1a242251ca252127f830ae20de9d55d40c21e4c Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 11 Apr 2024 18:26:03 +0200 Subject: [PATCH 0025/2648] ci: fixup upload artifact name Signed-off-by: Ettore Di Giacinto --- .github/workflows/release.yaml | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 8198fb3d..a69a2b05 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -84,7 +84,7 @@ jobs: fi - uses: actions/upload-artifact@v4 with: - name: ${{ matrix.build }} + name: LocalAI-linux-${{ matrix.build }} path: release/ - name: Release uses: softprops/action-gh-release@v1 @@ -115,12 +115,6 @@ jobs: with: name: stablediffusion path: release/ - - name: Release - uses: softprops/action-gh-release@v1 - if: startsWith(github.ref, 'refs/tags/') - with: - files: | - release/* build-macOS: strategy: @@ -156,7 +150,7 @@ jobs: make dist - uses: actions/upload-artifact@v4 with: - name: ${{ matrix.build }} + name: LocalAI-MacOS-${{ matrix.build }} path: release/ - name: Release uses: softprops/action-gh-release@v1 From cbda06fb96661e7c9386ccca1c6dcaf652083a70 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 11 Apr 2024 16:52:54 +0000 Subject: [PATCH 0026/2648] build(deps): bump github.com/gofiber/fiber/v2 from 2.52.0 to 2.52.4 (#2008) Bumps [github.com/gofiber/fiber/v2](https://github.com/gofiber/fiber) from 2.52.0 to 2.52.4. - [Release notes](https://github.com/gofiber/fiber/releases) - [Commits](https://github.com/gofiber/fiber/compare/v2.52.0...v2.52.4) --- updated-dependencies: - dependency-name: github.com/gofiber/fiber/v2 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 081c25d6..298f2d69 100644 --- a/go.mod +++ b/go.mod @@ -12,7 +12,7 @@ require ( github.com/go-audio/wav v1.1.0 github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1 github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428 - github.com/gofiber/fiber/v2 v2.52.1 + github.com/gofiber/fiber/v2 v2.52.4 github.com/gofiber/swagger v1.0.0 github.com/gofiber/template/html/v2 v2.1.1 github.com/google/uuid v1.5.0 diff --git a/go.sum b/go.sum index 359bc836..551dd922 100644 --- a/go.sum +++ b/go.sum @@ -109,8 +109,8 @@ github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEe github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/godbus/dbus/v5 v5.0.6/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= -github.com/gofiber/fiber/v2 v2.52.1 h1:1RoU2NS+b98o1L77sdl5mboGPiW+0Ypsi5oLmcYlgHI= -github.com/gofiber/fiber/v2 v2.52.1/go.mod h1:KEOE+cXMhXG0zHc9d8+E38hoX+ZN7bhOtgeF2oT6jrQ= +github.com/gofiber/fiber/v2 v2.52.4 h1:P+T+4iK7VaqUsq2PALYEfBBo6bJZ4q3FP8cZ84EggTM= +github.com/gofiber/fiber/v2 v2.52.4/go.mod h1:KEOE+cXMhXG0zHc9d8+E38hoX+ZN7bhOtgeF2oT6jrQ= github.com/gofiber/swagger v1.0.0 h1:BzUzDS9ZT6fDUa692kxmfOjc1DZiloLiPK/W5z1H1tc= github.com/gofiber/swagger v1.0.0/go.mod h1:QrYNF1Yrc7ggGK6ATsJ6yfH/8Zi5bu9lA7wB8TmCecg= github.com/gofiber/template v1.8.3 h1:hzHdvMwMo/T2kouz2pPCA0zGiLCeMnoGsQZBTSYgZxc= From 12c0d9443ecfa7367e041b900a243e0c77726dce Mon Sep 17 00:00:00 2001 From: Ludovic Leroux Date: Thu, 11 Apr 2024 13:20:22 -0400 Subject: [PATCH 0027/2648] feat: use tokenizer.apply_chat_template() in vLLM (#1990) Use tokenizer.apply_chat_template() in vLLM Signed-off-by: Ludovic LEROUX --- backend/backend.proto | 7 + backend/python/autogptq/backend_pb2.py | 98 ++- backend/python/autogptq/backend_pb2_grpc.py | 132 +++ backend/python/bark/backend_pb2.py | 98 ++- backend/python/bark/backend_pb2_grpc.py | 132 +++ backend/python/coqui/backend_pb2.py | 98 ++- backend/python/coqui/backend_pb2_grpc.py | 132 +++ backend/python/diffusers/backend_pb2.py | 98 ++- backend/python/diffusers/backend_pb2_grpc.py | 132 +++ backend/python/exllama/backend_pb2.py | 98 ++- backend/python/exllama/backend_pb2_grpc.py | 132 +++ backend/python/exllama2/backend_pb2.py | 98 ++- backend/python/exllama2/backend_pb2_grpc.py | 132 +++ backend/python/mamba/backend_pb2.py | 98 ++- backend/python/mamba/backend_pb2_grpc.py | 132 +++ backend/python/petals/backend_pb2.py | 98 ++- backend/python/petals/backend_pb2_grpc.py | 132 +++ .../sentencetransformers/backend_pb2.py | 98 ++- .../sentencetransformers/backend_pb2_grpc.py | 132 +++ .../transformers-musicgen/backend_pb2.py | 98 ++- .../transformers-musicgen/backend_pb2_grpc.py | 132 +++ backend/python/transformers/backend_pb2.py | 98 ++- .../python/transformers/backend_pb2_grpc.py | 132 +++ backend/python/vall-e-x/backend_pb2.py | 98 ++- backend/python/vall-e-x/backend_pb2_grpc.py | 132 +++ backend/python/vllm/backend_pb2.py | 98 ++- backend/python/vllm/backend_pb2_grpc.py | 132 +++ backend/python/vllm/backend_vllm.py | 23 +- core/backend/llm.go | 25 +- core/config/backend_config.go | 11 +- core/http/endpoints/openai/chat.go | 256 +++--- core/http/endpoints/openai/inference.go | 2 +- pkg/grpc/proto/backend.pb.go | 761 ++++++++++-------- pkg/grpc/proto/backend_grpc.pb.go | 2 +- 34 files changed, 3088 insertions(+), 989 deletions(-) diff --git a/backend/backend.proto b/backend/backend.proto index c3d3180b..56d919ef 100644 --- a/backend/backend.proto +++ b/backend/backend.proto @@ -107,6 +107,8 @@ message PredictOptions { string NegativePrompt = 40; int32 NDraft = 41; repeated string Images = 42; + bool UseTokenizerTemplate = 43; + repeated Message Messages = 44; } // The response message containing the result @@ -256,3 +258,8 @@ message StatusResponse { State state = 1; MemoryUsageData memory = 2; } + +message Message { + string role = 1; + string content = 2; +} \ No newline at end of file diff --git a/backend/python/autogptq/backend_pb2.py b/backend/python/autogptq/backend_pb2.py index 08b896c7..24b6de3b 100644 --- a/backend/python/autogptq/backend_pb2.py +++ b/backend/python/autogptq/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' - _globals['_HEALTHMESSAGE']._serialized_start=26 - _globals['_HEALTHMESSAGE']._serialized_end=41 - _globals['_PREDICTOPTIONS']._serialized_start=44 - _globals['_PREDICTOPTIONS']._serialized_end=850 - _globals['_REPLY']._serialized_start=852 - _globals['_REPLY']._serialized_end=876 - _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1951 - _globals['_RESULT']._serialized_start=1953 - _globals['_RESULT']._serialized_end=1995 - _globals['_EMBEDDINGRESULT']._serialized_start=1997 - _globals['_EMBEDDINGRESULT']._serialized_end=2034 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 - _globals['_TRANSCRIPTRESULT']._serialized_start=2105 - _globals['_TRANSCRIPTRESULT']._serialized_end=2183 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 - _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2563 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 - _globals['_MEMORYUSAGEDATA']._serialized_start=2622 - _globals['_MEMORYUSAGEDATA']._serialized_end=2764 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 - _globals['_STATUSRESPONSE']._serialized_start=2767 - _globals['_STATUSRESPONSE']._serialized_end=2940 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 - _globals['_BACKEND']._serialized_start=2943 - _globals['_BACKEND']._serialized_end=3571 + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + _globals['_STORESKEY']._serialized_start=26 + _globals['_STORESKEY']._serialized_end=53 + _globals['_STORESVALUE']._serialized_start=55 + _globals['_STORESVALUE']._serialized_end=83 + _globals['_STORESSETOPTIONS']._serialized_start=85 + _globals['_STORESSETOPTIONS']._serialized_end=175 + _globals['_STORESDELETEOPTIONS']._serialized_start=177 + _globals['_STORESDELETEOPTIONS']._serialized_end=232 + _globals['_STORESGETOPTIONS']._serialized_start=234 + _globals['_STORESGETOPTIONS']._serialized_end=286 + _globals['_STORESGETRESULT']._serialized_start=288 + _globals['_STORESGETRESULT']._serialized_end=377 + _globals['_STORESFINDOPTIONS']._serialized_start=379 + _globals['_STORESFINDOPTIONS']._serialized_end=445 + _globals['_STORESFINDRESULT']._serialized_start=447 + _globals['_STORESFINDRESULT']._serialized_end=559 + _globals['_HEALTHMESSAGE']._serialized_start=561 + _globals['_HEALTHMESSAGE']._serialized_end=576 + _globals['_PREDICTOPTIONS']._serialized_start=579 + _globals['_PREDICTOPTIONS']._serialized_end=1451 + _globals['_REPLY']._serialized_start=1453 + _globals['_REPLY']._serialized_end=1477 + _globals['_MODELOPTIONS']._serialized_start=1480 + _globals['_MODELOPTIONS']._serialized_end=2552 + _globals['_RESULT']._serialized_start=2554 + _globals['_RESULT']._serialized_end=2596 + _globals['_EMBEDDINGRESULT']._serialized_start=2598 + _globals['_EMBEDDINGRESULT']._serialized_end=2635 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 + _globals['_TRANSCRIPTRESULT']._serialized_start=2706 + _globals['_TRANSCRIPTRESULT']._serialized_end=2784 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 + _globals['_TTSREQUEST']._serialized_start=3095 + _globals['_TTSREQUEST']._serialized_end=3164 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 + _globals['_MEMORYUSAGEDATA']._serialized_start=3223 + _globals['_MEMORYUSAGEDATA']._serialized_end=3365 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 + _globals['_STATUSRESPONSE']._serialized_start=3368 + _globals['_STATUSRESPONSE']._serialized_end=3541 + _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 + _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 + _globals['_MESSAGE']._serialized_start=3543 + _globals['_MESSAGE']._serialized_end=3583 + _globals['_BACKEND']._serialized_start=3586 + _globals['_BACKEND']._serialized_end=4477 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/autogptq/backend_pb2_grpc.py b/backend/python/autogptq/backend_pb2_grpc.py index 79a7677f..e06fccf3 100644 --- a/backend/python/autogptq/backend_pb2_grpc.py +++ b/backend/python/autogptq/backend_pb2_grpc.py @@ -64,6 +64,26 @@ class BackendStub(object): request_serializer=backend__pb2.HealthMessage.SerializeToString, response_deserializer=backend__pb2.StatusResponse.FromString, ) + self.StoresSet = channel.unary_unary( + '/backend.Backend/StoresSet', + request_serializer=backend__pb2.StoresSetOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresDelete = channel.unary_unary( + '/backend.Backend/StoresDelete', + request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresGet = channel.unary_unary( + '/backend.Backend/StoresGet', + request_serializer=backend__pb2.StoresGetOptions.SerializeToString, + response_deserializer=backend__pb2.StoresGetResult.FromString, + ) + self.StoresFind = channel.unary_unary( + '/backend.Backend/StoresFind', + request_serializer=backend__pb2.StoresFindOptions.SerializeToString, + response_deserializer=backend__pb2.StoresFindResult.FromString, + ) class BackendServicer(object): @@ -129,6 +149,30 @@ class BackendServicer(object): context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') + def StoresSet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresDelete(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresGet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresFind(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + def add_BackendServicer_to_server(servicer, server): rpc_method_handlers = { @@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server): request_deserializer=backend__pb2.HealthMessage.FromString, response_serializer=backend__pb2.StatusResponse.SerializeToString, ), + 'StoresSet': grpc.unary_unary_rpc_method_handler( + servicer.StoresSet, + request_deserializer=backend__pb2.StoresSetOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresDelete': grpc.unary_unary_rpc_method_handler( + servicer.StoresDelete, + request_deserializer=backend__pb2.StoresDeleteOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresGet': grpc.unary_unary_rpc_method_handler( + servicer.StoresGet, + request_deserializer=backend__pb2.StoresGetOptions.FromString, + response_serializer=backend__pb2.StoresGetResult.SerializeToString, + ), + 'StoresFind': grpc.unary_unary_rpc_method_handler( + servicer.StoresFind, + request_deserializer=backend__pb2.StoresFindOptions.FromString, + response_serializer=backend__pb2.StoresFindResult.SerializeToString, + ), } generic_handler = grpc.method_handlers_generic_handler( 'backend.Backend', rpc_method_handlers) @@ -361,3 +425,71 @@ class Backend(object): backend__pb2.StatusResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresSet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', + backend__pb2.StoresSetOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresDelete(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', + backend__pb2.StoresDeleteOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresGet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', + backend__pb2.StoresGetOptions.SerializeToString, + backend__pb2.StoresGetResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresFind(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', + backend__pb2.StoresFindOptions.SerializeToString, + backend__pb2.StoresFindResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/bark/backend_pb2.py b/backend/python/bark/backend_pb2.py index 08b896c7..24b6de3b 100644 --- a/backend/python/bark/backend_pb2.py +++ b/backend/python/bark/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' - _globals['_HEALTHMESSAGE']._serialized_start=26 - _globals['_HEALTHMESSAGE']._serialized_end=41 - _globals['_PREDICTOPTIONS']._serialized_start=44 - _globals['_PREDICTOPTIONS']._serialized_end=850 - _globals['_REPLY']._serialized_start=852 - _globals['_REPLY']._serialized_end=876 - _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1951 - _globals['_RESULT']._serialized_start=1953 - _globals['_RESULT']._serialized_end=1995 - _globals['_EMBEDDINGRESULT']._serialized_start=1997 - _globals['_EMBEDDINGRESULT']._serialized_end=2034 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 - _globals['_TRANSCRIPTRESULT']._serialized_start=2105 - _globals['_TRANSCRIPTRESULT']._serialized_end=2183 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 - _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2563 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 - _globals['_MEMORYUSAGEDATA']._serialized_start=2622 - _globals['_MEMORYUSAGEDATA']._serialized_end=2764 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 - _globals['_STATUSRESPONSE']._serialized_start=2767 - _globals['_STATUSRESPONSE']._serialized_end=2940 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 - _globals['_BACKEND']._serialized_start=2943 - _globals['_BACKEND']._serialized_end=3571 + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + _globals['_STORESKEY']._serialized_start=26 + _globals['_STORESKEY']._serialized_end=53 + _globals['_STORESVALUE']._serialized_start=55 + _globals['_STORESVALUE']._serialized_end=83 + _globals['_STORESSETOPTIONS']._serialized_start=85 + _globals['_STORESSETOPTIONS']._serialized_end=175 + _globals['_STORESDELETEOPTIONS']._serialized_start=177 + _globals['_STORESDELETEOPTIONS']._serialized_end=232 + _globals['_STORESGETOPTIONS']._serialized_start=234 + _globals['_STORESGETOPTIONS']._serialized_end=286 + _globals['_STORESGETRESULT']._serialized_start=288 + _globals['_STORESGETRESULT']._serialized_end=377 + _globals['_STORESFINDOPTIONS']._serialized_start=379 + _globals['_STORESFINDOPTIONS']._serialized_end=445 + _globals['_STORESFINDRESULT']._serialized_start=447 + _globals['_STORESFINDRESULT']._serialized_end=559 + _globals['_HEALTHMESSAGE']._serialized_start=561 + _globals['_HEALTHMESSAGE']._serialized_end=576 + _globals['_PREDICTOPTIONS']._serialized_start=579 + _globals['_PREDICTOPTIONS']._serialized_end=1451 + _globals['_REPLY']._serialized_start=1453 + _globals['_REPLY']._serialized_end=1477 + _globals['_MODELOPTIONS']._serialized_start=1480 + _globals['_MODELOPTIONS']._serialized_end=2552 + _globals['_RESULT']._serialized_start=2554 + _globals['_RESULT']._serialized_end=2596 + _globals['_EMBEDDINGRESULT']._serialized_start=2598 + _globals['_EMBEDDINGRESULT']._serialized_end=2635 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 + _globals['_TRANSCRIPTRESULT']._serialized_start=2706 + _globals['_TRANSCRIPTRESULT']._serialized_end=2784 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 + _globals['_TTSREQUEST']._serialized_start=3095 + _globals['_TTSREQUEST']._serialized_end=3164 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 + _globals['_MEMORYUSAGEDATA']._serialized_start=3223 + _globals['_MEMORYUSAGEDATA']._serialized_end=3365 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 + _globals['_STATUSRESPONSE']._serialized_start=3368 + _globals['_STATUSRESPONSE']._serialized_end=3541 + _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 + _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 + _globals['_MESSAGE']._serialized_start=3543 + _globals['_MESSAGE']._serialized_end=3583 + _globals['_BACKEND']._serialized_start=3586 + _globals['_BACKEND']._serialized_end=4477 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/bark/backend_pb2_grpc.py b/backend/python/bark/backend_pb2_grpc.py index 79a7677f..e06fccf3 100644 --- a/backend/python/bark/backend_pb2_grpc.py +++ b/backend/python/bark/backend_pb2_grpc.py @@ -64,6 +64,26 @@ class BackendStub(object): request_serializer=backend__pb2.HealthMessage.SerializeToString, response_deserializer=backend__pb2.StatusResponse.FromString, ) + self.StoresSet = channel.unary_unary( + '/backend.Backend/StoresSet', + request_serializer=backend__pb2.StoresSetOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresDelete = channel.unary_unary( + '/backend.Backend/StoresDelete', + request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresGet = channel.unary_unary( + '/backend.Backend/StoresGet', + request_serializer=backend__pb2.StoresGetOptions.SerializeToString, + response_deserializer=backend__pb2.StoresGetResult.FromString, + ) + self.StoresFind = channel.unary_unary( + '/backend.Backend/StoresFind', + request_serializer=backend__pb2.StoresFindOptions.SerializeToString, + response_deserializer=backend__pb2.StoresFindResult.FromString, + ) class BackendServicer(object): @@ -129,6 +149,30 @@ class BackendServicer(object): context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') + def StoresSet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresDelete(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresGet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresFind(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + def add_BackendServicer_to_server(servicer, server): rpc_method_handlers = { @@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server): request_deserializer=backend__pb2.HealthMessage.FromString, response_serializer=backend__pb2.StatusResponse.SerializeToString, ), + 'StoresSet': grpc.unary_unary_rpc_method_handler( + servicer.StoresSet, + request_deserializer=backend__pb2.StoresSetOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresDelete': grpc.unary_unary_rpc_method_handler( + servicer.StoresDelete, + request_deserializer=backend__pb2.StoresDeleteOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresGet': grpc.unary_unary_rpc_method_handler( + servicer.StoresGet, + request_deserializer=backend__pb2.StoresGetOptions.FromString, + response_serializer=backend__pb2.StoresGetResult.SerializeToString, + ), + 'StoresFind': grpc.unary_unary_rpc_method_handler( + servicer.StoresFind, + request_deserializer=backend__pb2.StoresFindOptions.FromString, + response_serializer=backend__pb2.StoresFindResult.SerializeToString, + ), } generic_handler = grpc.method_handlers_generic_handler( 'backend.Backend', rpc_method_handlers) @@ -361,3 +425,71 @@ class Backend(object): backend__pb2.StatusResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresSet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', + backend__pb2.StoresSetOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresDelete(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', + backend__pb2.StoresDeleteOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresGet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', + backend__pb2.StoresGetOptions.SerializeToString, + backend__pb2.StoresGetResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresFind(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', + backend__pb2.StoresFindOptions.SerializeToString, + backend__pb2.StoresFindResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/coqui/backend_pb2.py b/backend/python/coqui/backend_pb2.py index 08b896c7..24b6de3b 100644 --- a/backend/python/coqui/backend_pb2.py +++ b/backend/python/coqui/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' - _globals['_HEALTHMESSAGE']._serialized_start=26 - _globals['_HEALTHMESSAGE']._serialized_end=41 - _globals['_PREDICTOPTIONS']._serialized_start=44 - _globals['_PREDICTOPTIONS']._serialized_end=850 - _globals['_REPLY']._serialized_start=852 - _globals['_REPLY']._serialized_end=876 - _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1951 - _globals['_RESULT']._serialized_start=1953 - _globals['_RESULT']._serialized_end=1995 - _globals['_EMBEDDINGRESULT']._serialized_start=1997 - _globals['_EMBEDDINGRESULT']._serialized_end=2034 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 - _globals['_TRANSCRIPTRESULT']._serialized_start=2105 - _globals['_TRANSCRIPTRESULT']._serialized_end=2183 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 - _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2563 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 - _globals['_MEMORYUSAGEDATA']._serialized_start=2622 - _globals['_MEMORYUSAGEDATA']._serialized_end=2764 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 - _globals['_STATUSRESPONSE']._serialized_start=2767 - _globals['_STATUSRESPONSE']._serialized_end=2940 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 - _globals['_BACKEND']._serialized_start=2943 - _globals['_BACKEND']._serialized_end=3571 + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + _globals['_STORESKEY']._serialized_start=26 + _globals['_STORESKEY']._serialized_end=53 + _globals['_STORESVALUE']._serialized_start=55 + _globals['_STORESVALUE']._serialized_end=83 + _globals['_STORESSETOPTIONS']._serialized_start=85 + _globals['_STORESSETOPTIONS']._serialized_end=175 + _globals['_STORESDELETEOPTIONS']._serialized_start=177 + _globals['_STORESDELETEOPTIONS']._serialized_end=232 + _globals['_STORESGETOPTIONS']._serialized_start=234 + _globals['_STORESGETOPTIONS']._serialized_end=286 + _globals['_STORESGETRESULT']._serialized_start=288 + _globals['_STORESGETRESULT']._serialized_end=377 + _globals['_STORESFINDOPTIONS']._serialized_start=379 + _globals['_STORESFINDOPTIONS']._serialized_end=445 + _globals['_STORESFINDRESULT']._serialized_start=447 + _globals['_STORESFINDRESULT']._serialized_end=559 + _globals['_HEALTHMESSAGE']._serialized_start=561 + _globals['_HEALTHMESSAGE']._serialized_end=576 + _globals['_PREDICTOPTIONS']._serialized_start=579 + _globals['_PREDICTOPTIONS']._serialized_end=1451 + _globals['_REPLY']._serialized_start=1453 + _globals['_REPLY']._serialized_end=1477 + _globals['_MODELOPTIONS']._serialized_start=1480 + _globals['_MODELOPTIONS']._serialized_end=2552 + _globals['_RESULT']._serialized_start=2554 + _globals['_RESULT']._serialized_end=2596 + _globals['_EMBEDDINGRESULT']._serialized_start=2598 + _globals['_EMBEDDINGRESULT']._serialized_end=2635 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 + _globals['_TRANSCRIPTRESULT']._serialized_start=2706 + _globals['_TRANSCRIPTRESULT']._serialized_end=2784 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 + _globals['_TTSREQUEST']._serialized_start=3095 + _globals['_TTSREQUEST']._serialized_end=3164 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 + _globals['_MEMORYUSAGEDATA']._serialized_start=3223 + _globals['_MEMORYUSAGEDATA']._serialized_end=3365 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 + _globals['_STATUSRESPONSE']._serialized_start=3368 + _globals['_STATUSRESPONSE']._serialized_end=3541 + _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 + _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 + _globals['_MESSAGE']._serialized_start=3543 + _globals['_MESSAGE']._serialized_end=3583 + _globals['_BACKEND']._serialized_start=3586 + _globals['_BACKEND']._serialized_end=4477 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/coqui/backend_pb2_grpc.py b/backend/python/coqui/backend_pb2_grpc.py index 79a7677f..e06fccf3 100644 --- a/backend/python/coqui/backend_pb2_grpc.py +++ b/backend/python/coqui/backend_pb2_grpc.py @@ -64,6 +64,26 @@ class BackendStub(object): request_serializer=backend__pb2.HealthMessage.SerializeToString, response_deserializer=backend__pb2.StatusResponse.FromString, ) + self.StoresSet = channel.unary_unary( + '/backend.Backend/StoresSet', + request_serializer=backend__pb2.StoresSetOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresDelete = channel.unary_unary( + '/backend.Backend/StoresDelete', + request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresGet = channel.unary_unary( + '/backend.Backend/StoresGet', + request_serializer=backend__pb2.StoresGetOptions.SerializeToString, + response_deserializer=backend__pb2.StoresGetResult.FromString, + ) + self.StoresFind = channel.unary_unary( + '/backend.Backend/StoresFind', + request_serializer=backend__pb2.StoresFindOptions.SerializeToString, + response_deserializer=backend__pb2.StoresFindResult.FromString, + ) class BackendServicer(object): @@ -129,6 +149,30 @@ class BackendServicer(object): context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') + def StoresSet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresDelete(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresGet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresFind(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + def add_BackendServicer_to_server(servicer, server): rpc_method_handlers = { @@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server): request_deserializer=backend__pb2.HealthMessage.FromString, response_serializer=backend__pb2.StatusResponse.SerializeToString, ), + 'StoresSet': grpc.unary_unary_rpc_method_handler( + servicer.StoresSet, + request_deserializer=backend__pb2.StoresSetOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresDelete': grpc.unary_unary_rpc_method_handler( + servicer.StoresDelete, + request_deserializer=backend__pb2.StoresDeleteOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresGet': grpc.unary_unary_rpc_method_handler( + servicer.StoresGet, + request_deserializer=backend__pb2.StoresGetOptions.FromString, + response_serializer=backend__pb2.StoresGetResult.SerializeToString, + ), + 'StoresFind': grpc.unary_unary_rpc_method_handler( + servicer.StoresFind, + request_deserializer=backend__pb2.StoresFindOptions.FromString, + response_serializer=backend__pb2.StoresFindResult.SerializeToString, + ), } generic_handler = grpc.method_handlers_generic_handler( 'backend.Backend', rpc_method_handlers) @@ -361,3 +425,71 @@ class Backend(object): backend__pb2.StatusResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresSet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', + backend__pb2.StoresSetOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresDelete(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', + backend__pb2.StoresDeleteOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresGet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', + backend__pb2.StoresGetOptions.SerializeToString, + backend__pb2.StoresGetResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresFind(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', + backend__pb2.StoresFindOptions.SerializeToString, + backend__pb2.StoresFindResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/diffusers/backend_pb2.py b/backend/python/diffusers/backend_pb2.py index 08b896c7..24b6de3b 100644 --- a/backend/python/diffusers/backend_pb2.py +++ b/backend/python/diffusers/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' - _globals['_HEALTHMESSAGE']._serialized_start=26 - _globals['_HEALTHMESSAGE']._serialized_end=41 - _globals['_PREDICTOPTIONS']._serialized_start=44 - _globals['_PREDICTOPTIONS']._serialized_end=850 - _globals['_REPLY']._serialized_start=852 - _globals['_REPLY']._serialized_end=876 - _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1951 - _globals['_RESULT']._serialized_start=1953 - _globals['_RESULT']._serialized_end=1995 - _globals['_EMBEDDINGRESULT']._serialized_start=1997 - _globals['_EMBEDDINGRESULT']._serialized_end=2034 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 - _globals['_TRANSCRIPTRESULT']._serialized_start=2105 - _globals['_TRANSCRIPTRESULT']._serialized_end=2183 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 - _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2563 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 - _globals['_MEMORYUSAGEDATA']._serialized_start=2622 - _globals['_MEMORYUSAGEDATA']._serialized_end=2764 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 - _globals['_STATUSRESPONSE']._serialized_start=2767 - _globals['_STATUSRESPONSE']._serialized_end=2940 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 - _globals['_BACKEND']._serialized_start=2943 - _globals['_BACKEND']._serialized_end=3571 + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + _globals['_STORESKEY']._serialized_start=26 + _globals['_STORESKEY']._serialized_end=53 + _globals['_STORESVALUE']._serialized_start=55 + _globals['_STORESVALUE']._serialized_end=83 + _globals['_STORESSETOPTIONS']._serialized_start=85 + _globals['_STORESSETOPTIONS']._serialized_end=175 + _globals['_STORESDELETEOPTIONS']._serialized_start=177 + _globals['_STORESDELETEOPTIONS']._serialized_end=232 + _globals['_STORESGETOPTIONS']._serialized_start=234 + _globals['_STORESGETOPTIONS']._serialized_end=286 + _globals['_STORESGETRESULT']._serialized_start=288 + _globals['_STORESGETRESULT']._serialized_end=377 + _globals['_STORESFINDOPTIONS']._serialized_start=379 + _globals['_STORESFINDOPTIONS']._serialized_end=445 + _globals['_STORESFINDRESULT']._serialized_start=447 + _globals['_STORESFINDRESULT']._serialized_end=559 + _globals['_HEALTHMESSAGE']._serialized_start=561 + _globals['_HEALTHMESSAGE']._serialized_end=576 + _globals['_PREDICTOPTIONS']._serialized_start=579 + _globals['_PREDICTOPTIONS']._serialized_end=1451 + _globals['_REPLY']._serialized_start=1453 + _globals['_REPLY']._serialized_end=1477 + _globals['_MODELOPTIONS']._serialized_start=1480 + _globals['_MODELOPTIONS']._serialized_end=2552 + _globals['_RESULT']._serialized_start=2554 + _globals['_RESULT']._serialized_end=2596 + _globals['_EMBEDDINGRESULT']._serialized_start=2598 + _globals['_EMBEDDINGRESULT']._serialized_end=2635 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 + _globals['_TRANSCRIPTRESULT']._serialized_start=2706 + _globals['_TRANSCRIPTRESULT']._serialized_end=2784 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 + _globals['_TTSREQUEST']._serialized_start=3095 + _globals['_TTSREQUEST']._serialized_end=3164 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 + _globals['_MEMORYUSAGEDATA']._serialized_start=3223 + _globals['_MEMORYUSAGEDATA']._serialized_end=3365 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 + _globals['_STATUSRESPONSE']._serialized_start=3368 + _globals['_STATUSRESPONSE']._serialized_end=3541 + _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 + _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 + _globals['_MESSAGE']._serialized_start=3543 + _globals['_MESSAGE']._serialized_end=3583 + _globals['_BACKEND']._serialized_start=3586 + _globals['_BACKEND']._serialized_end=4477 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/diffusers/backend_pb2_grpc.py b/backend/python/diffusers/backend_pb2_grpc.py index 79a7677f..e06fccf3 100644 --- a/backend/python/diffusers/backend_pb2_grpc.py +++ b/backend/python/diffusers/backend_pb2_grpc.py @@ -64,6 +64,26 @@ class BackendStub(object): request_serializer=backend__pb2.HealthMessage.SerializeToString, response_deserializer=backend__pb2.StatusResponse.FromString, ) + self.StoresSet = channel.unary_unary( + '/backend.Backend/StoresSet', + request_serializer=backend__pb2.StoresSetOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresDelete = channel.unary_unary( + '/backend.Backend/StoresDelete', + request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresGet = channel.unary_unary( + '/backend.Backend/StoresGet', + request_serializer=backend__pb2.StoresGetOptions.SerializeToString, + response_deserializer=backend__pb2.StoresGetResult.FromString, + ) + self.StoresFind = channel.unary_unary( + '/backend.Backend/StoresFind', + request_serializer=backend__pb2.StoresFindOptions.SerializeToString, + response_deserializer=backend__pb2.StoresFindResult.FromString, + ) class BackendServicer(object): @@ -129,6 +149,30 @@ class BackendServicer(object): context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') + def StoresSet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresDelete(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresGet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresFind(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + def add_BackendServicer_to_server(servicer, server): rpc_method_handlers = { @@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server): request_deserializer=backend__pb2.HealthMessage.FromString, response_serializer=backend__pb2.StatusResponse.SerializeToString, ), + 'StoresSet': grpc.unary_unary_rpc_method_handler( + servicer.StoresSet, + request_deserializer=backend__pb2.StoresSetOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresDelete': grpc.unary_unary_rpc_method_handler( + servicer.StoresDelete, + request_deserializer=backend__pb2.StoresDeleteOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresGet': grpc.unary_unary_rpc_method_handler( + servicer.StoresGet, + request_deserializer=backend__pb2.StoresGetOptions.FromString, + response_serializer=backend__pb2.StoresGetResult.SerializeToString, + ), + 'StoresFind': grpc.unary_unary_rpc_method_handler( + servicer.StoresFind, + request_deserializer=backend__pb2.StoresFindOptions.FromString, + response_serializer=backend__pb2.StoresFindResult.SerializeToString, + ), } generic_handler = grpc.method_handlers_generic_handler( 'backend.Backend', rpc_method_handlers) @@ -361,3 +425,71 @@ class Backend(object): backend__pb2.StatusResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresSet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', + backend__pb2.StoresSetOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresDelete(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', + backend__pb2.StoresDeleteOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresGet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', + backend__pb2.StoresGetOptions.SerializeToString, + backend__pb2.StoresGetResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresFind(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', + backend__pb2.StoresFindOptions.SerializeToString, + backend__pb2.StoresFindResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/exllama/backend_pb2.py b/backend/python/exllama/backend_pb2.py index 08b896c7..24b6de3b 100644 --- a/backend/python/exllama/backend_pb2.py +++ b/backend/python/exllama/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' - _globals['_HEALTHMESSAGE']._serialized_start=26 - _globals['_HEALTHMESSAGE']._serialized_end=41 - _globals['_PREDICTOPTIONS']._serialized_start=44 - _globals['_PREDICTOPTIONS']._serialized_end=850 - _globals['_REPLY']._serialized_start=852 - _globals['_REPLY']._serialized_end=876 - _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1951 - _globals['_RESULT']._serialized_start=1953 - _globals['_RESULT']._serialized_end=1995 - _globals['_EMBEDDINGRESULT']._serialized_start=1997 - _globals['_EMBEDDINGRESULT']._serialized_end=2034 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 - _globals['_TRANSCRIPTRESULT']._serialized_start=2105 - _globals['_TRANSCRIPTRESULT']._serialized_end=2183 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 - _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2563 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 - _globals['_MEMORYUSAGEDATA']._serialized_start=2622 - _globals['_MEMORYUSAGEDATA']._serialized_end=2764 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 - _globals['_STATUSRESPONSE']._serialized_start=2767 - _globals['_STATUSRESPONSE']._serialized_end=2940 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 - _globals['_BACKEND']._serialized_start=2943 - _globals['_BACKEND']._serialized_end=3571 + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + _globals['_STORESKEY']._serialized_start=26 + _globals['_STORESKEY']._serialized_end=53 + _globals['_STORESVALUE']._serialized_start=55 + _globals['_STORESVALUE']._serialized_end=83 + _globals['_STORESSETOPTIONS']._serialized_start=85 + _globals['_STORESSETOPTIONS']._serialized_end=175 + _globals['_STORESDELETEOPTIONS']._serialized_start=177 + _globals['_STORESDELETEOPTIONS']._serialized_end=232 + _globals['_STORESGETOPTIONS']._serialized_start=234 + _globals['_STORESGETOPTIONS']._serialized_end=286 + _globals['_STORESGETRESULT']._serialized_start=288 + _globals['_STORESGETRESULT']._serialized_end=377 + _globals['_STORESFINDOPTIONS']._serialized_start=379 + _globals['_STORESFINDOPTIONS']._serialized_end=445 + _globals['_STORESFINDRESULT']._serialized_start=447 + _globals['_STORESFINDRESULT']._serialized_end=559 + _globals['_HEALTHMESSAGE']._serialized_start=561 + _globals['_HEALTHMESSAGE']._serialized_end=576 + _globals['_PREDICTOPTIONS']._serialized_start=579 + _globals['_PREDICTOPTIONS']._serialized_end=1451 + _globals['_REPLY']._serialized_start=1453 + _globals['_REPLY']._serialized_end=1477 + _globals['_MODELOPTIONS']._serialized_start=1480 + _globals['_MODELOPTIONS']._serialized_end=2552 + _globals['_RESULT']._serialized_start=2554 + _globals['_RESULT']._serialized_end=2596 + _globals['_EMBEDDINGRESULT']._serialized_start=2598 + _globals['_EMBEDDINGRESULT']._serialized_end=2635 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 + _globals['_TRANSCRIPTRESULT']._serialized_start=2706 + _globals['_TRANSCRIPTRESULT']._serialized_end=2784 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 + _globals['_TTSREQUEST']._serialized_start=3095 + _globals['_TTSREQUEST']._serialized_end=3164 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 + _globals['_MEMORYUSAGEDATA']._serialized_start=3223 + _globals['_MEMORYUSAGEDATA']._serialized_end=3365 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 + _globals['_STATUSRESPONSE']._serialized_start=3368 + _globals['_STATUSRESPONSE']._serialized_end=3541 + _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 + _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 + _globals['_MESSAGE']._serialized_start=3543 + _globals['_MESSAGE']._serialized_end=3583 + _globals['_BACKEND']._serialized_start=3586 + _globals['_BACKEND']._serialized_end=4477 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/exllama/backend_pb2_grpc.py b/backend/python/exllama/backend_pb2_grpc.py index 79a7677f..e06fccf3 100644 --- a/backend/python/exllama/backend_pb2_grpc.py +++ b/backend/python/exllama/backend_pb2_grpc.py @@ -64,6 +64,26 @@ class BackendStub(object): request_serializer=backend__pb2.HealthMessage.SerializeToString, response_deserializer=backend__pb2.StatusResponse.FromString, ) + self.StoresSet = channel.unary_unary( + '/backend.Backend/StoresSet', + request_serializer=backend__pb2.StoresSetOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresDelete = channel.unary_unary( + '/backend.Backend/StoresDelete', + request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresGet = channel.unary_unary( + '/backend.Backend/StoresGet', + request_serializer=backend__pb2.StoresGetOptions.SerializeToString, + response_deserializer=backend__pb2.StoresGetResult.FromString, + ) + self.StoresFind = channel.unary_unary( + '/backend.Backend/StoresFind', + request_serializer=backend__pb2.StoresFindOptions.SerializeToString, + response_deserializer=backend__pb2.StoresFindResult.FromString, + ) class BackendServicer(object): @@ -129,6 +149,30 @@ class BackendServicer(object): context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') + def StoresSet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresDelete(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresGet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresFind(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + def add_BackendServicer_to_server(servicer, server): rpc_method_handlers = { @@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server): request_deserializer=backend__pb2.HealthMessage.FromString, response_serializer=backend__pb2.StatusResponse.SerializeToString, ), + 'StoresSet': grpc.unary_unary_rpc_method_handler( + servicer.StoresSet, + request_deserializer=backend__pb2.StoresSetOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresDelete': grpc.unary_unary_rpc_method_handler( + servicer.StoresDelete, + request_deserializer=backend__pb2.StoresDeleteOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresGet': grpc.unary_unary_rpc_method_handler( + servicer.StoresGet, + request_deserializer=backend__pb2.StoresGetOptions.FromString, + response_serializer=backend__pb2.StoresGetResult.SerializeToString, + ), + 'StoresFind': grpc.unary_unary_rpc_method_handler( + servicer.StoresFind, + request_deserializer=backend__pb2.StoresFindOptions.FromString, + response_serializer=backend__pb2.StoresFindResult.SerializeToString, + ), } generic_handler = grpc.method_handlers_generic_handler( 'backend.Backend', rpc_method_handlers) @@ -361,3 +425,71 @@ class Backend(object): backend__pb2.StatusResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresSet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', + backend__pb2.StoresSetOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresDelete(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', + backend__pb2.StoresDeleteOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresGet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', + backend__pb2.StoresGetOptions.SerializeToString, + backend__pb2.StoresGetResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresFind(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', + backend__pb2.StoresFindOptions.SerializeToString, + backend__pb2.StoresFindResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/exllama2/backend_pb2.py b/backend/python/exllama2/backend_pb2.py index 08b896c7..24b6de3b 100644 --- a/backend/python/exllama2/backend_pb2.py +++ b/backend/python/exllama2/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' - _globals['_HEALTHMESSAGE']._serialized_start=26 - _globals['_HEALTHMESSAGE']._serialized_end=41 - _globals['_PREDICTOPTIONS']._serialized_start=44 - _globals['_PREDICTOPTIONS']._serialized_end=850 - _globals['_REPLY']._serialized_start=852 - _globals['_REPLY']._serialized_end=876 - _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1951 - _globals['_RESULT']._serialized_start=1953 - _globals['_RESULT']._serialized_end=1995 - _globals['_EMBEDDINGRESULT']._serialized_start=1997 - _globals['_EMBEDDINGRESULT']._serialized_end=2034 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 - _globals['_TRANSCRIPTRESULT']._serialized_start=2105 - _globals['_TRANSCRIPTRESULT']._serialized_end=2183 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 - _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2563 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 - _globals['_MEMORYUSAGEDATA']._serialized_start=2622 - _globals['_MEMORYUSAGEDATA']._serialized_end=2764 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 - _globals['_STATUSRESPONSE']._serialized_start=2767 - _globals['_STATUSRESPONSE']._serialized_end=2940 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 - _globals['_BACKEND']._serialized_start=2943 - _globals['_BACKEND']._serialized_end=3571 + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + _globals['_STORESKEY']._serialized_start=26 + _globals['_STORESKEY']._serialized_end=53 + _globals['_STORESVALUE']._serialized_start=55 + _globals['_STORESVALUE']._serialized_end=83 + _globals['_STORESSETOPTIONS']._serialized_start=85 + _globals['_STORESSETOPTIONS']._serialized_end=175 + _globals['_STORESDELETEOPTIONS']._serialized_start=177 + _globals['_STORESDELETEOPTIONS']._serialized_end=232 + _globals['_STORESGETOPTIONS']._serialized_start=234 + _globals['_STORESGETOPTIONS']._serialized_end=286 + _globals['_STORESGETRESULT']._serialized_start=288 + _globals['_STORESGETRESULT']._serialized_end=377 + _globals['_STORESFINDOPTIONS']._serialized_start=379 + _globals['_STORESFINDOPTIONS']._serialized_end=445 + _globals['_STORESFINDRESULT']._serialized_start=447 + _globals['_STORESFINDRESULT']._serialized_end=559 + _globals['_HEALTHMESSAGE']._serialized_start=561 + _globals['_HEALTHMESSAGE']._serialized_end=576 + _globals['_PREDICTOPTIONS']._serialized_start=579 + _globals['_PREDICTOPTIONS']._serialized_end=1451 + _globals['_REPLY']._serialized_start=1453 + _globals['_REPLY']._serialized_end=1477 + _globals['_MODELOPTIONS']._serialized_start=1480 + _globals['_MODELOPTIONS']._serialized_end=2552 + _globals['_RESULT']._serialized_start=2554 + _globals['_RESULT']._serialized_end=2596 + _globals['_EMBEDDINGRESULT']._serialized_start=2598 + _globals['_EMBEDDINGRESULT']._serialized_end=2635 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 + _globals['_TRANSCRIPTRESULT']._serialized_start=2706 + _globals['_TRANSCRIPTRESULT']._serialized_end=2784 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 + _globals['_TTSREQUEST']._serialized_start=3095 + _globals['_TTSREQUEST']._serialized_end=3164 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 + _globals['_MEMORYUSAGEDATA']._serialized_start=3223 + _globals['_MEMORYUSAGEDATA']._serialized_end=3365 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 + _globals['_STATUSRESPONSE']._serialized_start=3368 + _globals['_STATUSRESPONSE']._serialized_end=3541 + _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 + _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 + _globals['_MESSAGE']._serialized_start=3543 + _globals['_MESSAGE']._serialized_end=3583 + _globals['_BACKEND']._serialized_start=3586 + _globals['_BACKEND']._serialized_end=4477 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/exllama2/backend_pb2_grpc.py b/backend/python/exllama2/backend_pb2_grpc.py index 79a7677f..e06fccf3 100644 --- a/backend/python/exllama2/backend_pb2_grpc.py +++ b/backend/python/exllama2/backend_pb2_grpc.py @@ -64,6 +64,26 @@ class BackendStub(object): request_serializer=backend__pb2.HealthMessage.SerializeToString, response_deserializer=backend__pb2.StatusResponse.FromString, ) + self.StoresSet = channel.unary_unary( + '/backend.Backend/StoresSet', + request_serializer=backend__pb2.StoresSetOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresDelete = channel.unary_unary( + '/backend.Backend/StoresDelete', + request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresGet = channel.unary_unary( + '/backend.Backend/StoresGet', + request_serializer=backend__pb2.StoresGetOptions.SerializeToString, + response_deserializer=backend__pb2.StoresGetResult.FromString, + ) + self.StoresFind = channel.unary_unary( + '/backend.Backend/StoresFind', + request_serializer=backend__pb2.StoresFindOptions.SerializeToString, + response_deserializer=backend__pb2.StoresFindResult.FromString, + ) class BackendServicer(object): @@ -129,6 +149,30 @@ class BackendServicer(object): context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') + def StoresSet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresDelete(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresGet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresFind(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + def add_BackendServicer_to_server(servicer, server): rpc_method_handlers = { @@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server): request_deserializer=backend__pb2.HealthMessage.FromString, response_serializer=backend__pb2.StatusResponse.SerializeToString, ), + 'StoresSet': grpc.unary_unary_rpc_method_handler( + servicer.StoresSet, + request_deserializer=backend__pb2.StoresSetOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresDelete': grpc.unary_unary_rpc_method_handler( + servicer.StoresDelete, + request_deserializer=backend__pb2.StoresDeleteOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresGet': grpc.unary_unary_rpc_method_handler( + servicer.StoresGet, + request_deserializer=backend__pb2.StoresGetOptions.FromString, + response_serializer=backend__pb2.StoresGetResult.SerializeToString, + ), + 'StoresFind': grpc.unary_unary_rpc_method_handler( + servicer.StoresFind, + request_deserializer=backend__pb2.StoresFindOptions.FromString, + response_serializer=backend__pb2.StoresFindResult.SerializeToString, + ), } generic_handler = grpc.method_handlers_generic_handler( 'backend.Backend', rpc_method_handlers) @@ -361,3 +425,71 @@ class Backend(object): backend__pb2.StatusResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresSet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', + backend__pb2.StoresSetOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresDelete(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', + backend__pb2.StoresDeleteOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresGet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', + backend__pb2.StoresGetOptions.SerializeToString, + backend__pb2.StoresGetResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresFind(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', + backend__pb2.StoresFindOptions.SerializeToString, + backend__pb2.StoresFindResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/mamba/backend_pb2.py b/backend/python/mamba/backend_pb2.py index 08b896c7..24b6de3b 100644 --- a/backend/python/mamba/backend_pb2.py +++ b/backend/python/mamba/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' - _globals['_HEALTHMESSAGE']._serialized_start=26 - _globals['_HEALTHMESSAGE']._serialized_end=41 - _globals['_PREDICTOPTIONS']._serialized_start=44 - _globals['_PREDICTOPTIONS']._serialized_end=850 - _globals['_REPLY']._serialized_start=852 - _globals['_REPLY']._serialized_end=876 - _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1951 - _globals['_RESULT']._serialized_start=1953 - _globals['_RESULT']._serialized_end=1995 - _globals['_EMBEDDINGRESULT']._serialized_start=1997 - _globals['_EMBEDDINGRESULT']._serialized_end=2034 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 - _globals['_TRANSCRIPTRESULT']._serialized_start=2105 - _globals['_TRANSCRIPTRESULT']._serialized_end=2183 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 - _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2563 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 - _globals['_MEMORYUSAGEDATA']._serialized_start=2622 - _globals['_MEMORYUSAGEDATA']._serialized_end=2764 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 - _globals['_STATUSRESPONSE']._serialized_start=2767 - _globals['_STATUSRESPONSE']._serialized_end=2940 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 - _globals['_BACKEND']._serialized_start=2943 - _globals['_BACKEND']._serialized_end=3571 + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + _globals['_STORESKEY']._serialized_start=26 + _globals['_STORESKEY']._serialized_end=53 + _globals['_STORESVALUE']._serialized_start=55 + _globals['_STORESVALUE']._serialized_end=83 + _globals['_STORESSETOPTIONS']._serialized_start=85 + _globals['_STORESSETOPTIONS']._serialized_end=175 + _globals['_STORESDELETEOPTIONS']._serialized_start=177 + _globals['_STORESDELETEOPTIONS']._serialized_end=232 + _globals['_STORESGETOPTIONS']._serialized_start=234 + _globals['_STORESGETOPTIONS']._serialized_end=286 + _globals['_STORESGETRESULT']._serialized_start=288 + _globals['_STORESGETRESULT']._serialized_end=377 + _globals['_STORESFINDOPTIONS']._serialized_start=379 + _globals['_STORESFINDOPTIONS']._serialized_end=445 + _globals['_STORESFINDRESULT']._serialized_start=447 + _globals['_STORESFINDRESULT']._serialized_end=559 + _globals['_HEALTHMESSAGE']._serialized_start=561 + _globals['_HEALTHMESSAGE']._serialized_end=576 + _globals['_PREDICTOPTIONS']._serialized_start=579 + _globals['_PREDICTOPTIONS']._serialized_end=1451 + _globals['_REPLY']._serialized_start=1453 + _globals['_REPLY']._serialized_end=1477 + _globals['_MODELOPTIONS']._serialized_start=1480 + _globals['_MODELOPTIONS']._serialized_end=2552 + _globals['_RESULT']._serialized_start=2554 + _globals['_RESULT']._serialized_end=2596 + _globals['_EMBEDDINGRESULT']._serialized_start=2598 + _globals['_EMBEDDINGRESULT']._serialized_end=2635 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 + _globals['_TRANSCRIPTRESULT']._serialized_start=2706 + _globals['_TRANSCRIPTRESULT']._serialized_end=2784 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 + _globals['_TTSREQUEST']._serialized_start=3095 + _globals['_TTSREQUEST']._serialized_end=3164 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 + _globals['_MEMORYUSAGEDATA']._serialized_start=3223 + _globals['_MEMORYUSAGEDATA']._serialized_end=3365 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 + _globals['_STATUSRESPONSE']._serialized_start=3368 + _globals['_STATUSRESPONSE']._serialized_end=3541 + _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 + _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 + _globals['_MESSAGE']._serialized_start=3543 + _globals['_MESSAGE']._serialized_end=3583 + _globals['_BACKEND']._serialized_start=3586 + _globals['_BACKEND']._serialized_end=4477 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/mamba/backend_pb2_grpc.py b/backend/python/mamba/backend_pb2_grpc.py index 79a7677f..e06fccf3 100644 --- a/backend/python/mamba/backend_pb2_grpc.py +++ b/backend/python/mamba/backend_pb2_grpc.py @@ -64,6 +64,26 @@ class BackendStub(object): request_serializer=backend__pb2.HealthMessage.SerializeToString, response_deserializer=backend__pb2.StatusResponse.FromString, ) + self.StoresSet = channel.unary_unary( + '/backend.Backend/StoresSet', + request_serializer=backend__pb2.StoresSetOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresDelete = channel.unary_unary( + '/backend.Backend/StoresDelete', + request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresGet = channel.unary_unary( + '/backend.Backend/StoresGet', + request_serializer=backend__pb2.StoresGetOptions.SerializeToString, + response_deserializer=backend__pb2.StoresGetResult.FromString, + ) + self.StoresFind = channel.unary_unary( + '/backend.Backend/StoresFind', + request_serializer=backend__pb2.StoresFindOptions.SerializeToString, + response_deserializer=backend__pb2.StoresFindResult.FromString, + ) class BackendServicer(object): @@ -129,6 +149,30 @@ class BackendServicer(object): context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') + def StoresSet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresDelete(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresGet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresFind(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + def add_BackendServicer_to_server(servicer, server): rpc_method_handlers = { @@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server): request_deserializer=backend__pb2.HealthMessage.FromString, response_serializer=backend__pb2.StatusResponse.SerializeToString, ), + 'StoresSet': grpc.unary_unary_rpc_method_handler( + servicer.StoresSet, + request_deserializer=backend__pb2.StoresSetOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresDelete': grpc.unary_unary_rpc_method_handler( + servicer.StoresDelete, + request_deserializer=backend__pb2.StoresDeleteOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresGet': grpc.unary_unary_rpc_method_handler( + servicer.StoresGet, + request_deserializer=backend__pb2.StoresGetOptions.FromString, + response_serializer=backend__pb2.StoresGetResult.SerializeToString, + ), + 'StoresFind': grpc.unary_unary_rpc_method_handler( + servicer.StoresFind, + request_deserializer=backend__pb2.StoresFindOptions.FromString, + response_serializer=backend__pb2.StoresFindResult.SerializeToString, + ), } generic_handler = grpc.method_handlers_generic_handler( 'backend.Backend', rpc_method_handlers) @@ -361,3 +425,71 @@ class Backend(object): backend__pb2.StatusResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresSet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', + backend__pb2.StoresSetOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresDelete(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', + backend__pb2.StoresDeleteOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresGet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', + backend__pb2.StoresGetOptions.SerializeToString, + backend__pb2.StoresGetResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresFind(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', + backend__pb2.StoresFindOptions.SerializeToString, + backend__pb2.StoresFindResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/petals/backend_pb2.py b/backend/python/petals/backend_pb2.py index 08b896c7..24b6de3b 100644 --- a/backend/python/petals/backend_pb2.py +++ b/backend/python/petals/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' - _globals['_HEALTHMESSAGE']._serialized_start=26 - _globals['_HEALTHMESSAGE']._serialized_end=41 - _globals['_PREDICTOPTIONS']._serialized_start=44 - _globals['_PREDICTOPTIONS']._serialized_end=850 - _globals['_REPLY']._serialized_start=852 - _globals['_REPLY']._serialized_end=876 - _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1951 - _globals['_RESULT']._serialized_start=1953 - _globals['_RESULT']._serialized_end=1995 - _globals['_EMBEDDINGRESULT']._serialized_start=1997 - _globals['_EMBEDDINGRESULT']._serialized_end=2034 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 - _globals['_TRANSCRIPTRESULT']._serialized_start=2105 - _globals['_TRANSCRIPTRESULT']._serialized_end=2183 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 - _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2563 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 - _globals['_MEMORYUSAGEDATA']._serialized_start=2622 - _globals['_MEMORYUSAGEDATA']._serialized_end=2764 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 - _globals['_STATUSRESPONSE']._serialized_start=2767 - _globals['_STATUSRESPONSE']._serialized_end=2940 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 - _globals['_BACKEND']._serialized_start=2943 - _globals['_BACKEND']._serialized_end=3571 + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + _globals['_STORESKEY']._serialized_start=26 + _globals['_STORESKEY']._serialized_end=53 + _globals['_STORESVALUE']._serialized_start=55 + _globals['_STORESVALUE']._serialized_end=83 + _globals['_STORESSETOPTIONS']._serialized_start=85 + _globals['_STORESSETOPTIONS']._serialized_end=175 + _globals['_STORESDELETEOPTIONS']._serialized_start=177 + _globals['_STORESDELETEOPTIONS']._serialized_end=232 + _globals['_STORESGETOPTIONS']._serialized_start=234 + _globals['_STORESGETOPTIONS']._serialized_end=286 + _globals['_STORESGETRESULT']._serialized_start=288 + _globals['_STORESGETRESULT']._serialized_end=377 + _globals['_STORESFINDOPTIONS']._serialized_start=379 + _globals['_STORESFINDOPTIONS']._serialized_end=445 + _globals['_STORESFINDRESULT']._serialized_start=447 + _globals['_STORESFINDRESULT']._serialized_end=559 + _globals['_HEALTHMESSAGE']._serialized_start=561 + _globals['_HEALTHMESSAGE']._serialized_end=576 + _globals['_PREDICTOPTIONS']._serialized_start=579 + _globals['_PREDICTOPTIONS']._serialized_end=1451 + _globals['_REPLY']._serialized_start=1453 + _globals['_REPLY']._serialized_end=1477 + _globals['_MODELOPTIONS']._serialized_start=1480 + _globals['_MODELOPTIONS']._serialized_end=2552 + _globals['_RESULT']._serialized_start=2554 + _globals['_RESULT']._serialized_end=2596 + _globals['_EMBEDDINGRESULT']._serialized_start=2598 + _globals['_EMBEDDINGRESULT']._serialized_end=2635 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 + _globals['_TRANSCRIPTRESULT']._serialized_start=2706 + _globals['_TRANSCRIPTRESULT']._serialized_end=2784 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 + _globals['_TTSREQUEST']._serialized_start=3095 + _globals['_TTSREQUEST']._serialized_end=3164 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 + _globals['_MEMORYUSAGEDATA']._serialized_start=3223 + _globals['_MEMORYUSAGEDATA']._serialized_end=3365 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 + _globals['_STATUSRESPONSE']._serialized_start=3368 + _globals['_STATUSRESPONSE']._serialized_end=3541 + _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 + _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 + _globals['_MESSAGE']._serialized_start=3543 + _globals['_MESSAGE']._serialized_end=3583 + _globals['_BACKEND']._serialized_start=3586 + _globals['_BACKEND']._serialized_end=4477 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/petals/backend_pb2_grpc.py b/backend/python/petals/backend_pb2_grpc.py index 79a7677f..e06fccf3 100644 --- a/backend/python/petals/backend_pb2_grpc.py +++ b/backend/python/petals/backend_pb2_grpc.py @@ -64,6 +64,26 @@ class BackendStub(object): request_serializer=backend__pb2.HealthMessage.SerializeToString, response_deserializer=backend__pb2.StatusResponse.FromString, ) + self.StoresSet = channel.unary_unary( + '/backend.Backend/StoresSet', + request_serializer=backend__pb2.StoresSetOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresDelete = channel.unary_unary( + '/backend.Backend/StoresDelete', + request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresGet = channel.unary_unary( + '/backend.Backend/StoresGet', + request_serializer=backend__pb2.StoresGetOptions.SerializeToString, + response_deserializer=backend__pb2.StoresGetResult.FromString, + ) + self.StoresFind = channel.unary_unary( + '/backend.Backend/StoresFind', + request_serializer=backend__pb2.StoresFindOptions.SerializeToString, + response_deserializer=backend__pb2.StoresFindResult.FromString, + ) class BackendServicer(object): @@ -129,6 +149,30 @@ class BackendServicer(object): context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') + def StoresSet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresDelete(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresGet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresFind(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + def add_BackendServicer_to_server(servicer, server): rpc_method_handlers = { @@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server): request_deserializer=backend__pb2.HealthMessage.FromString, response_serializer=backend__pb2.StatusResponse.SerializeToString, ), + 'StoresSet': grpc.unary_unary_rpc_method_handler( + servicer.StoresSet, + request_deserializer=backend__pb2.StoresSetOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresDelete': grpc.unary_unary_rpc_method_handler( + servicer.StoresDelete, + request_deserializer=backend__pb2.StoresDeleteOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresGet': grpc.unary_unary_rpc_method_handler( + servicer.StoresGet, + request_deserializer=backend__pb2.StoresGetOptions.FromString, + response_serializer=backend__pb2.StoresGetResult.SerializeToString, + ), + 'StoresFind': grpc.unary_unary_rpc_method_handler( + servicer.StoresFind, + request_deserializer=backend__pb2.StoresFindOptions.FromString, + response_serializer=backend__pb2.StoresFindResult.SerializeToString, + ), } generic_handler = grpc.method_handlers_generic_handler( 'backend.Backend', rpc_method_handlers) @@ -361,3 +425,71 @@ class Backend(object): backend__pb2.StatusResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresSet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', + backend__pb2.StoresSetOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresDelete(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', + backend__pb2.StoresDeleteOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresGet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', + backend__pb2.StoresGetOptions.SerializeToString, + backend__pb2.StoresGetResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresFind(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', + backend__pb2.StoresFindOptions.SerializeToString, + backend__pb2.StoresFindResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/sentencetransformers/backend_pb2.py b/backend/python/sentencetransformers/backend_pb2.py index 08b896c7..24b6de3b 100644 --- a/backend/python/sentencetransformers/backend_pb2.py +++ b/backend/python/sentencetransformers/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' - _globals['_HEALTHMESSAGE']._serialized_start=26 - _globals['_HEALTHMESSAGE']._serialized_end=41 - _globals['_PREDICTOPTIONS']._serialized_start=44 - _globals['_PREDICTOPTIONS']._serialized_end=850 - _globals['_REPLY']._serialized_start=852 - _globals['_REPLY']._serialized_end=876 - _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1951 - _globals['_RESULT']._serialized_start=1953 - _globals['_RESULT']._serialized_end=1995 - _globals['_EMBEDDINGRESULT']._serialized_start=1997 - _globals['_EMBEDDINGRESULT']._serialized_end=2034 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 - _globals['_TRANSCRIPTRESULT']._serialized_start=2105 - _globals['_TRANSCRIPTRESULT']._serialized_end=2183 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 - _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2563 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 - _globals['_MEMORYUSAGEDATA']._serialized_start=2622 - _globals['_MEMORYUSAGEDATA']._serialized_end=2764 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 - _globals['_STATUSRESPONSE']._serialized_start=2767 - _globals['_STATUSRESPONSE']._serialized_end=2940 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 - _globals['_BACKEND']._serialized_start=2943 - _globals['_BACKEND']._serialized_end=3571 + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + _globals['_STORESKEY']._serialized_start=26 + _globals['_STORESKEY']._serialized_end=53 + _globals['_STORESVALUE']._serialized_start=55 + _globals['_STORESVALUE']._serialized_end=83 + _globals['_STORESSETOPTIONS']._serialized_start=85 + _globals['_STORESSETOPTIONS']._serialized_end=175 + _globals['_STORESDELETEOPTIONS']._serialized_start=177 + _globals['_STORESDELETEOPTIONS']._serialized_end=232 + _globals['_STORESGETOPTIONS']._serialized_start=234 + _globals['_STORESGETOPTIONS']._serialized_end=286 + _globals['_STORESGETRESULT']._serialized_start=288 + _globals['_STORESGETRESULT']._serialized_end=377 + _globals['_STORESFINDOPTIONS']._serialized_start=379 + _globals['_STORESFINDOPTIONS']._serialized_end=445 + _globals['_STORESFINDRESULT']._serialized_start=447 + _globals['_STORESFINDRESULT']._serialized_end=559 + _globals['_HEALTHMESSAGE']._serialized_start=561 + _globals['_HEALTHMESSAGE']._serialized_end=576 + _globals['_PREDICTOPTIONS']._serialized_start=579 + _globals['_PREDICTOPTIONS']._serialized_end=1451 + _globals['_REPLY']._serialized_start=1453 + _globals['_REPLY']._serialized_end=1477 + _globals['_MODELOPTIONS']._serialized_start=1480 + _globals['_MODELOPTIONS']._serialized_end=2552 + _globals['_RESULT']._serialized_start=2554 + _globals['_RESULT']._serialized_end=2596 + _globals['_EMBEDDINGRESULT']._serialized_start=2598 + _globals['_EMBEDDINGRESULT']._serialized_end=2635 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 + _globals['_TRANSCRIPTRESULT']._serialized_start=2706 + _globals['_TRANSCRIPTRESULT']._serialized_end=2784 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 + _globals['_TTSREQUEST']._serialized_start=3095 + _globals['_TTSREQUEST']._serialized_end=3164 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 + _globals['_MEMORYUSAGEDATA']._serialized_start=3223 + _globals['_MEMORYUSAGEDATA']._serialized_end=3365 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 + _globals['_STATUSRESPONSE']._serialized_start=3368 + _globals['_STATUSRESPONSE']._serialized_end=3541 + _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 + _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 + _globals['_MESSAGE']._serialized_start=3543 + _globals['_MESSAGE']._serialized_end=3583 + _globals['_BACKEND']._serialized_start=3586 + _globals['_BACKEND']._serialized_end=4477 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/sentencetransformers/backend_pb2_grpc.py b/backend/python/sentencetransformers/backend_pb2_grpc.py index 79a7677f..e06fccf3 100644 --- a/backend/python/sentencetransformers/backend_pb2_grpc.py +++ b/backend/python/sentencetransformers/backend_pb2_grpc.py @@ -64,6 +64,26 @@ class BackendStub(object): request_serializer=backend__pb2.HealthMessage.SerializeToString, response_deserializer=backend__pb2.StatusResponse.FromString, ) + self.StoresSet = channel.unary_unary( + '/backend.Backend/StoresSet', + request_serializer=backend__pb2.StoresSetOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresDelete = channel.unary_unary( + '/backend.Backend/StoresDelete', + request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresGet = channel.unary_unary( + '/backend.Backend/StoresGet', + request_serializer=backend__pb2.StoresGetOptions.SerializeToString, + response_deserializer=backend__pb2.StoresGetResult.FromString, + ) + self.StoresFind = channel.unary_unary( + '/backend.Backend/StoresFind', + request_serializer=backend__pb2.StoresFindOptions.SerializeToString, + response_deserializer=backend__pb2.StoresFindResult.FromString, + ) class BackendServicer(object): @@ -129,6 +149,30 @@ class BackendServicer(object): context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') + def StoresSet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresDelete(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresGet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresFind(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + def add_BackendServicer_to_server(servicer, server): rpc_method_handlers = { @@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server): request_deserializer=backend__pb2.HealthMessage.FromString, response_serializer=backend__pb2.StatusResponse.SerializeToString, ), + 'StoresSet': grpc.unary_unary_rpc_method_handler( + servicer.StoresSet, + request_deserializer=backend__pb2.StoresSetOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresDelete': grpc.unary_unary_rpc_method_handler( + servicer.StoresDelete, + request_deserializer=backend__pb2.StoresDeleteOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresGet': grpc.unary_unary_rpc_method_handler( + servicer.StoresGet, + request_deserializer=backend__pb2.StoresGetOptions.FromString, + response_serializer=backend__pb2.StoresGetResult.SerializeToString, + ), + 'StoresFind': grpc.unary_unary_rpc_method_handler( + servicer.StoresFind, + request_deserializer=backend__pb2.StoresFindOptions.FromString, + response_serializer=backend__pb2.StoresFindResult.SerializeToString, + ), } generic_handler = grpc.method_handlers_generic_handler( 'backend.Backend', rpc_method_handlers) @@ -361,3 +425,71 @@ class Backend(object): backend__pb2.StatusResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresSet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', + backend__pb2.StoresSetOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresDelete(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', + backend__pb2.StoresDeleteOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresGet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', + backend__pb2.StoresGetOptions.SerializeToString, + backend__pb2.StoresGetResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresFind(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', + backend__pb2.StoresFindOptions.SerializeToString, + backend__pb2.StoresFindResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/transformers-musicgen/backend_pb2.py b/backend/python/transformers-musicgen/backend_pb2.py index 08b896c7..24b6de3b 100644 --- a/backend/python/transformers-musicgen/backend_pb2.py +++ b/backend/python/transformers-musicgen/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' - _globals['_HEALTHMESSAGE']._serialized_start=26 - _globals['_HEALTHMESSAGE']._serialized_end=41 - _globals['_PREDICTOPTIONS']._serialized_start=44 - _globals['_PREDICTOPTIONS']._serialized_end=850 - _globals['_REPLY']._serialized_start=852 - _globals['_REPLY']._serialized_end=876 - _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1951 - _globals['_RESULT']._serialized_start=1953 - _globals['_RESULT']._serialized_end=1995 - _globals['_EMBEDDINGRESULT']._serialized_start=1997 - _globals['_EMBEDDINGRESULT']._serialized_end=2034 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 - _globals['_TRANSCRIPTRESULT']._serialized_start=2105 - _globals['_TRANSCRIPTRESULT']._serialized_end=2183 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 - _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2563 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 - _globals['_MEMORYUSAGEDATA']._serialized_start=2622 - _globals['_MEMORYUSAGEDATA']._serialized_end=2764 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 - _globals['_STATUSRESPONSE']._serialized_start=2767 - _globals['_STATUSRESPONSE']._serialized_end=2940 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 - _globals['_BACKEND']._serialized_start=2943 - _globals['_BACKEND']._serialized_end=3571 + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + _globals['_STORESKEY']._serialized_start=26 + _globals['_STORESKEY']._serialized_end=53 + _globals['_STORESVALUE']._serialized_start=55 + _globals['_STORESVALUE']._serialized_end=83 + _globals['_STORESSETOPTIONS']._serialized_start=85 + _globals['_STORESSETOPTIONS']._serialized_end=175 + _globals['_STORESDELETEOPTIONS']._serialized_start=177 + _globals['_STORESDELETEOPTIONS']._serialized_end=232 + _globals['_STORESGETOPTIONS']._serialized_start=234 + _globals['_STORESGETOPTIONS']._serialized_end=286 + _globals['_STORESGETRESULT']._serialized_start=288 + _globals['_STORESGETRESULT']._serialized_end=377 + _globals['_STORESFINDOPTIONS']._serialized_start=379 + _globals['_STORESFINDOPTIONS']._serialized_end=445 + _globals['_STORESFINDRESULT']._serialized_start=447 + _globals['_STORESFINDRESULT']._serialized_end=559 + _globals['_HEALTHMESSAGE']._serialized_start=561 + _globals['_HEALTHMESSAGE']._serialized_end=576 + _globals['_PREDICTOPTIONS']._serialized_start=579 + _globals['_PREDICTOPTIONS']._serialized_end=1451 + _globals['_REPLY']._serialized_start=1453 + _globals['_REPLY']._serialized_end=1477 + _globals['_MODELOPTIONS']._serialized_start=1480 + _globals['_MODELOPTIONS']._serialized_end=2552 + _globals['_RESULT']._serialized_start=2554 + _globals['_RESULT']._serialized_end=2596 + _globals['_EMBEDDINGRESULT']._serialized_start=2598 + _globals['_EMBEDDINGRESULT']._serialized_end=2635 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 + _globals['_TRANSCRIPTRESULT']._serialized_start=2706 + _globals['_TRANSCRIPTRESULT']._serialized_end=2784 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 + _globals['_TTSREQUEST']._serialized_start=3095 + _globals['_TTSREQUEST']._serialized_end=3164 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 + _globals['_MEMORYUSAGEDATA']._serialized_start=3223 + _globals['_MEMORYUSAGEDATA']._serialized_end=3365 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 + _globals['_STATUSRESPONSE']._serialized_start=3368 + _globals['_STATUSRESPONSE']._serialized_end=3541 + _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 + _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 + _globals['_MESSAGE']._serialized_start=3543 + _globals['_MESSAGE']._serialized_end=3583 + _globals['_BACKEND']._serialized_start=3586 + _globals['_BACKEND']._serialized_end=4477 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/transformers-musicgen/backend_pb2_grpc.py b/backend/python/transformers-musicgen/backend_pb2_grpc.py index 79a7677f..e06fccf3 100644 --- a/backend/python/transformers-musicgen/backend_pb2_grpc.py +++ b/backend/python/transformers-musicgen/backend_pb2_grpc.py @@ -64,6 +64,26 @@ class BackendStub(object): request_serializer=backend__pb2.HealthMessage.SerializeToString, response_deserializer=backend__pb2.StatusResponse.FromString, ) + self.StoresSet = channel.unary_unary( + '/backend.Backend/StoresSet', + request_serializer=backend__pb2.StoresSetOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresDelete = channel.unary_unary( + '/backend.Backend/StoresDelete', + request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresGet = channel.unary_unary( + '/backend.Backend/StoresGet', + request_serializer=backend__pb2.StoresGetOptions.SerializeToString, + response_deserializer=backend__pb2.StoresGetResult.FromString, + ) + self.StoresFind = channel.unary_unary( + '/backend.Backend/StoresFind', + request_serializer=backend__pb2.StoresFindOptions.SerializeToString, + response_deserializer=backend__pb2.StoresFindResult.FromString, + ) class BackendServicer(object): @@ -129,6 +149,30 @@ class BackendServicer(object): context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') + def StoresSet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresDelete(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresGet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresFind(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + def add_BackendServicer_to_server(servicer, server): rpc_method_handlers = { @@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server): request_deserializer=backend__pb2.HealthMessage.FromString, response_serializer=backend__pb2.StatusResponse.SerializeToString, ), + 'StoresSet': grpc.unary_unary_rpc_method_handler( + servicer.StoresSet, + request_deserializer=backend__pb2.StoresSetOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresDelete': grpc.unary_unary_rpc_method_handler( + servicer.StoresDelete, + request_deserializer=backend__pb2.StoresDeleteOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresGet': grpc.unary_unary_rpc_method_handler( + servicer.StoresGet, + request_deserializer=backend__pb2.StoresGetOptions.FromString, + response_serializer=backend__pb2.StoresGetResult.SerializeToString, + ), + 'StoresFind': grpc.unary_unary_rpc_method_handler( + servicer.StoresFind, + request_deserializer=backend__pb2.StoresFindOptions.FromString, + response_serializer=backend__pb2.StoresFindResult.SerializeToString, + ), } generic_handler = grpc.method_handlers_generic_handler( 'backend.Backend', rpc_method_handlers) @@ -361,3 +425,71 @@ class Backend(object): backend__pb2.StatusResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresSet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', + backend__pb2.StoresSetOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresDelete(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', + backend__pb2.StoresDeleteOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresGet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', + backend__pb2.StoresGetOptions.SerializeToString, + backend__pb2.StoresGetResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresFind(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', + backend__pb2.StoresFindOptions.SerializeToString, + backend__pb2.StoresFindResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/transformers/backend_pb2.py b/backend/python/transformers/backend_pb2.py index 08b896c7..24b6de3b 100644 --- a/backend/python/transformers/backend_pb2.py +++ b/backend/python/transformers/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' - _globals['_HEALTHMESSAGE']._serialized_start=26 - _globals['_HEALTHMESSAGE']._serialized_end=41 - _globals['_PREDICTOPTIONS']._serialized_start=44 - _globals['_PREDICTOPTIONS']._serialized_end=850 - _globals['_REPLY']._serialized_start=852 - _globals['_REPLY']._serialized_end=876 - _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1951 - _globals['_RESULT']._serialized_start=1953 - _globals['_RESULT']._serialized_end=1995 - _globals['_EMBEDDINGRESULT']._serialized_start=1997 - _globals['_EMBEDDINGRESULT']._serialized_end=2034 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 - _globals['_TRANSCRIPTRESULT']._serialized_start=2105 - _globals['_TRANSCRIPTRESULT']._serialized_end=2183 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 - _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2563 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 - _globals['_MEMORYUSAGEDATA']._serialized_start=2622 - _globals['_MEMORYUSAGEDATA']._serialized_end=2764 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 - _globals['_STATUSRESPONSE']._serialized_start=2767 - _globals['_STATUSRESPONSE']._serialized_end=2940 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 - _globals['_BACKEND']._serialized_start=2943 - _globals['_BACKEND']._serialized_end=3571 + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + _globals['_STORESKEY']._serialized_start=26 + _globals['_STORESKEY']._serialized_end=53 + _globals['_STORESVALUE']._serialized_start=55 + _globals['_STORESVALUE']._serialized_end=83 + _globals['_STORESSETOPTIONS']._serialized_start=85 + _globals['_STORESSETOPTIONS']._serialized_end=175 + _globals['_STORESDELETEOPTIONS']._serialized_start=177 + _globals['_STORESDELETEOPTIONS']._serialized_end=232 + _globals['_STORESGETOPTIONS']._serialized_start=234 + _globals['_STORESGETOPTIONS']._serialized_end=286 + _globals['_STORESGETRESULT']._serialized_start=288 + _globals['_STORESGETRESULT']._serialized_end=377 + _globals['_STORESFINDOPTIONS']._serialized_start=379 + _globals['_STORESFINDOPTIONS']._serialized_end=445 + _globals['_STORESFINDRESULT']._serialized_start=447 + _globals['_STORESFINDRESULT']._serialized_end=559 + _globals['_HEALTHMESSAGE']._serialized_start=561 + _globals['_HEALTHMESSAGE']._serialized_end=576 + _globals['_PREDICTOPTIONS']._serialized_start=579 + _globals['_PREDICTOPTIONS']._serialized_end=1451 + _globals['_REPLY']._serialized_start=1453 + _globals['_REPLY']._serialized_end=1477 + _globals['_MODELOPTIONS']._serialized_start=1480 + _globals['_MODELOPTIONS']._serialized_end=2552 + _globals['_RESULT']._serialized_start=2554 + _globals['_RESULT']._serialized_end=2596 + _globals['_EMBEDDINGRESULT']._serialized_start=2598 + _globals['_EMBEDDINGRESULT']._serialized_end=2635 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 + _globals['_TRANSCRIPTRESULT']._serialized_start=2706 + _globals['_TRANSCRIPTRESULT']._serialized_end=2784 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 + _globals['_TTSREQUEST']._serialized_start=3095 + _globals['_TTSREQUEST']._serialized_end=3164 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 + _globals['_MEMORYUSAGEDATA']._serialized_start=3223 + _globals['_MEMORYUSAGEDATA']._serialized_end=3365 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 + _globals['_STATUSRESPONSE']._serialized_start=3368 + _globals['_STATUSRESPONSE']._serialized_end=3541 + _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 + _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 + _globals['_MESSAGE']._serialized_start=3543 + _globals['_MESSAGE']._serialized_end=3583 + _globals['_BACKEND']._serialized_start=3586 + _globals['_BACKEND']._serialized_end=4477 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/transformers/backend_pb2_grpc.py b/backend/python/transformers/backend_pb2_grpc.py index 79a7677f..e06fccf3 100644 --- a/backend/python/transformers/backend_pb2_grpc.py +++ b/backend/python/transformers/backend_pb2_grpc.py @@ -64,6 +64,26 @@ class BackendStub(object): request_serializer=backend__pb2.HealthMessage.SerializeToString, response_deserializer=backend__pb2.StatusResponse.FromString, ) + self.StoresSet = channel.unary_unary( + '/backend.Backend/StoresSet', + request_serializer=backend__pb2.StoresSetOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresDelete = channel.unary_unary( + '/backend.Backend/StoresDelete', + request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresGet = channel.unary_unary( + '/backend.Backend/StoresGet', + request_serializer=backend__pb2.StoresGetOptions.SerializeToString, + response_deserializer=backend__pb2.StoresGetResult.FromString, + ) + self.StoresFind = channel.unary_unary( + '/backend.Backend/StoresFind', + request_serializer=backend__pb2.StoresFindOptions.SerializeToString, + response_deserializer=backend__pb2.StoresFindResult.FromString, + ) class BackendServicer(object): @@ -129,6 +149,30 @@ class BackendServicer(object): context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') + def StoresSet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresDelete(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresGet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresFind(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + def add_BackendServicer_to_server(servicer, server): rpc_method_handlers = { @@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server): request_deserializer=backend__pb2.HealthMessage.FromString, response_serializer=backend__pb2.StatusResponse.SerializeToString, ), + 'StoresSet': grpc.unary_unary_rpc_method_handler( + servicer.StoresSet, + request_deserializer=backend__pb2.StoresSetOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresDelete': grpc.unary_unary_rpc_method_handler( + servicer.StoresDelete, + request_deserializer=backend__pb2.StoresDeleteOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresGet': grpc.unary_unary_rpc_method_handler( + servicer.StoresGet, + request_deserializer=backend__pb2.StoresGetOptions.FromString, + response_serializer=backend__pb2.StoresGetResult.SerializeToString, + ), + 'StoresFind': grpc.unary_unary_rpc_method_handler( + servicer.StoresFind, + request_deserializer=backend__pb2.StoresFindOptions.FromString, + response_serializer=backend__pb2.StoresFindResult.SerializeToString, + ), } generic_handler = grpc.method_handlers_generic_handler( 'backend.Backend', rpc_method_handlers) @@ -361,3 +425,71 @@ class Backend(object): backend__pb2.StatusResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresSet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', + backend__pb2.StoresSetOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresDelete(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', + backend__pb2.StoresDeleteOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresGet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', + backend__pb2.StoresGetOptions.SerializeToString, + backend__pb2.StoresGetResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresFind(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', + backend__pb2.StoresFindOptions.SerializeToString, + backend__pb2.StoresFindResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/vall-e-x/backend_pb2.py b/backend/python/vall-e-x/backend_pb2.py index 08b896c7..24b6de3b 100644 --- a/backend/python/vall-e-x/backend_pb2.py +++ b/backend/python/vall-e-x/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' - _globals['_HEALTHMESSAGE']._serialized_start=26 - _globals['_HEALTHMESSAGE']._serialized_end=41 - _globals['_PREDICTOPTIONS']._serialized_start=44 - _globals['_PREDICTOPTIONS']._serialized_end=850 - _globals['_REPLY']._serialized_start=852 - _globals['_REPLY']._serialized_end=876 - _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1951 - _globals['_RESULT']._serialized_start=1953 - _globals['_RESULT']._serialized_end=1995 - _globals['_EMBEDDINGRESULT']._serialized_start=1997 - _globals['_EMBEDDINGRESULT']._serialized_end=2034 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 - _globals['_TRANSCRIPTRESULT']._serialized_start=2105 - _globals['_TRANSCRIPTRESULT']._serialized_end=2183 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 - _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2563 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 - _globals['_MEMORYUSAGEDATA']._serialized_start=2622 - _globals['_MEMORYUSAGEDATA']._serialized_end=2764 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 - _globals['_STATUSRESPONSE']._serialized_start=2767 - _globals['_STATUSRESPONSE']._serialized_end=2940 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 - _globals['_BACKEND']._serialized_start=2943 - _globals['_BACKEND']._serialized_end=3571 + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + _globals['_STORESKEY']._serialized_start=26 + _globals['_STORESKEY']._serialized_end=53 + _globals['_STORESVALUE']._serialized_start=55 + _globals['_STORESVALUE']._serialized_end=83 + _globals['_STORESSETOPTIONS']._serialized_start=85 + _globals['_STORESSETOPTIONS']._serialized_end=175 + _globals['_STORESDELETEOPTIONS']._serialized_start=177 + _globals['_STORESDELETEOPTIONS']._serialized_end=232 + _globals['_STORESGETOPTIONS']._serialized_start=234 + _globals['_STORESGETOPTIONS']._serialized_end=286 + _globals['_STORESGETRESULT']._serialized_start=288 + _globals['_STORESGETRESULT']._serialized_end=377 + _globals['_STORESFINDOPTIONS']._serialized_start=379 + _globals['_STORESFINDOPTIONS']._serialized_end=445 + _globals['_STORESFINDRESULT']._serialized_start=447 + _globals['_STORESFINDRESULT']._serialized_end=559 + _globals['_HEALTHMESSAGE']._serialized_start=561 + _globals['_HEALTHMESSAGE']._serialized_end=576 + _globals['_PREDICTOPTIONS']._serialized_start=579 + _globals['_PREDICTOPTIONS']._serialized_end=1451 + _globals['_REPLY']._serialized_start=1453 + _globals['_REPLY']._serialized_end=1477 + _globals['_MODELOPTIONS']._serialized_start=1480 + _globals['_MODELOPTIONS']._serialized_end=2552 + _globals['_RESULT']._serialized_start=2554 + _globals['_RESULT']._serialized_end=2596 + _globals['_EMBEDDINGRESULT']._serialized_start=2598 + _globals['_EMBEDDINGRESULT']._serialized_end=2635 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 + _globals['_TRANSCRIPTRESULT']._serialized_start=2706 + _globals['_TRANSCRIPTRESULT']._serialized_end=2784 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 + _globals['_TTSREQUEST']._serialized_start=3095 + _globals['_TTSREQUEST']._serialized_end=3164 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 + _globals['_MEMORYUSAGEDATA']._serialized_start=3223 + _globals['_MEMORYUSAGEDATA']._serialized_end=3365 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 + _globals['_STATUSRESPONSE']._serialized_start=3368 + _globals['_STATUSRESPONSE']._serialized_end=3541 + _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 + _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 + _globals['_MESSAGE']._serialized_start=3543 + _globals['_MESSAGE']._serialized_end=3583 + _globals['_BACKEND']._serialized_start=3586 + _globals['_BACKEND']._serialized_end=4477 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/vall-e-x/backend_pb2_grpc.py b/backend/python/vall-e-x/backend_pb2_grpc.py index 79a7677f..e06fccf3 100644 --- a/backend/python/vall-e-x/backend_pb2_grpc.py +++ b/backend/python/vall-e-x/backend_pb2_grpc.py @@ -64,6 +64,26 @@ class BackendStub(object): request_serializer=backend__pb2.HealthMessage.SerializeToString, response_deserializer=backend__pb2.StatusResponse.FromString, ) + self.StoresSet = channel.unary_unary( + '/backend.Backend/StoresSet', + request_serializer=backend__pb2.StoresSetOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresDelete = channel.unary_unary( + '/backend.Backend/StoresDelete', + request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresGet = channel.unary_unary( + '/backend.Backend/StoresGet', + request_serializer=backend__pb2.StoresGetOptions.SerializeToString, + response_deserializer=backend__pb2.StoresGetResult.FromString, + ) + self.StoresFind = channel.unary_unary( + '/backend.Backend/StoresFind', + request_serializer=backend__pb2.StoresFindOptions.SerializeToString, + response_deserializer=backend__pb2.StoresFindResult.FromString, + ) class BackendServicer(object): @@ -129,6 +149,30 @@ class BackendServicer(object): context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') + def StoresSet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresDelete(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresGet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresFind(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + def add_BackendServicer_to_server(servicer, server): rpc_method_handlers = { @@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server): request_deserializer=backend__pb2.HealthMessage.FromString, response_serializer=backend__pb2.StatusResponse.SerializeToString, ), + 'StoresSet': grpc.unary_unary_rpc_method_handler( + servicer.StoresSet, + request_deserializer=backend__pb2.StoresSetOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresDelete': grpc.unary_unary_rpc_method_handler( + servicer.StoresDelete, + request_deserializer=backend__pb2.StoresDeleteOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresGet': grpc.unary_unary_rpc_method_handler( + servicer.StoresGet, + request_deserializer=backend__pb2.StoresGetOptions.FromString, + response_serializer=backend__pb2.StoresGetResult.SerializeToString, + ), + 'StoresFind': grpc.unary_unary_rpc_method_handler( + servicer.StoresFind, + request_deserializer=backend__pb2.StoresFindOptions.FromString, + response_serializer=backend__pb2.StoresFindResult.SerializeToString, + ), } generic_handler = grpc.method_handlers_generic_handler( 'backend.Backend', rpc_method_handlers) @@ -361,3 +425,71 @@ class Backend(object): backend__pb2.StatusResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresSet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', + backend__pb2.StoresSetOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresDelete(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', + backend__pb2.StoresDeleteOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresGet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', + backend__pb2.StoresGetOptions.SerializeToString, + backend__pb2.StoresGetResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresFind(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', + backend__pb2.StoresFindOptions.SerializeToString, + backend__pb2.StoresFindResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/vllm/backend_pb2.py b/backend/python/vllm/backend_pb2.py index 08b896c7..24b6de3b 100644 --- a/backend/python/vllm/backend_pb2.py +++ b/backend/python/vllm/backend_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: backend.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,49 +14,66 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa6\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x32\xf4\x04\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _MEMORYUSAGEDATA_BREAKDOWNENTRY._options = None - _MEMORYUSAGEDATA_BREAKDOWNENTRY._serialized_options = b'8\001' - _globals['_HEALTHMESSAGE']._serialized_start=26 - _globals['_HEALTHMESSAGE']._serialized_end=41 - _globals['_PREDICTOPTIONS']._serialized_start=44 - _globals['_PREDICTOPTIONS']._serialized_end=850 - _globals['_REPLY']._serialized_start=852 - _globals['_REPLY']._serialized_end=876 - _globals['_MODELOPTIONS']._serialized_start=879 - _globals['_MODELOPTIONS']._serialized_end=1951 - _globals['_RESULT']._serialized_start=1953 - _globals['_RESULT']._serialized_end=1995 - _globals['_EMBEDDINGRESULT']._serialized_start=1997 - _globals['_EMBEDDINGRESULT']._serialized_end=2034 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2036 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2103 - _globals['_TRANSCRIPTRESULT']._serialized_start=2105 - _globals['_TRANSCRIPTRESULT']._serialized_end=2183 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2185 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2274 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2277 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=2492 - _globals['_TTSREQUEST']._serialized_start=2494 - _globals['_TTSREQUEST']._serialized_end=2563 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=2565 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=2619 - _globals['_MEMORYUSAGEDATA']._serialized_start=2622 - _globals['_MEMORYUSAGEDATA']._serialized_end=2764 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=2716 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=2764 - _globals['_STATUSRESPONSE']._serialized_start=2767 - _globals['_STATUSRESPONSE']._serialized_end=2940 - _globals['_STATUSRESPONSE_STATE']._serialized_start=2873 - _globals['_STATUSRESPONSE_STATE']._serialized_end=2940 - _globals['_BACKEND']._serialized_start=2943 - _globals['_BACKEND']._serialized_end=3571 + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' + _globals['_STORESKEY']._serialized_start=26 + _globals['_STORESKEY']._serialized_end=53 + _globals['_STORESVALUE']._serialized_start=55 + _globals['_STORESVALUE']._serialized_end=83 + _globals['_STORESSETOPTIONS']._serialized_start=85 + _globals['_STORESSETOPTIONS']._serialized_end=175 + _globals['_STORESDELETEOPTIONS']._serialized_start=177 + _globals['_STORESDELETEOPTIONS']._serialized_end=232 + _globals['_STORESGETOPTIONS']._serialized_start=234 + _globals['_STORESGETOPTIONS']._serialized_end=286 + _globals['_STORESGETRESULT']._serialized_start=288 + _globals['_STORESGETRESULT']._serialized_end=377 + _globals['_STORESFINDOPTIONS']._serialized_start=379 + _globals['_STORESFINDOPTIONS']._serialized_end=445 + _globals['_STORESFINDRESULT']._serialized_start=447 + _globals['_STORESFINDRESULT']._serialized_end=559 + _globals['_HEALTHMESSAGE']._serialized_start=561 + _globals['_HEALTHMESSAGE']._serialized_end=576 + _globals['_PREDICTOPTIONS']._serialized_start=579 + _globals['_PREDICTOPTIONS']._serialized_end=1451 + _globals['_REPLY']._serialized_start=1453 + _globals['_REPLY']._serialized_end=1477 + _globals['_MODELOPTIONS']._serialized_start=1480 + _globals['_MODELOPTIONS']._serialized_end=2552 + _globals['_RESULT']._serialized_start=2554 + _globals['_RESULT']._serialized_end=2596 + _globals['_EMBEDDINGRESULT']._serialized_start=2598 + _globals['_EMBEDDINGRESULT']._serialized_end=2635 + _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 + _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 + _globals['_TRANSCRIPTRESULT']._serialized_start=2706 + _globals['_TRANSCRIPTRESULT']._serialized_end=2784 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 + _globals['_TTSREQUEST']._serialized_start=3095 + _globals['_TTSREQUEST']._serialized_end=3164 + _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 + _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 + _globals['_MEMORYUSAGEDATA']._serialized_start=3223 + _globals['_MEMORYUSAGEDATA']._serialized_end=3365 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 + _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 + _globals['_STATUSRESPONSE']._serialized_start=3368 + _globals['_STATUSRESPONSE']._serialized_end=3541 + _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 + _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 + _globals['_MESSAGE']._serialized_start=3543 + _globals['_MESSAGE']._serialized_end=3583 + _globals['_BACKEND']._serialized_start=3586 + _globals['_BACKEND']._serialized_end=4477 # @@protoc_insertion_point(module_scope) diff --git a/backend/python/vllm/backend_pb2_grpc.py b/backend/python/vllm/backend_pb2_grpc.py index 79a7677f..e06fccf3 100644 --- a/backend/python/vllm/backend_pb2_grpc.py +++ b/backend/python/vllm/backend_pb2_grpc.py @@ -64,6 +64,26 @@ class BackendStub(object): request_serializer=backend__pb2.HealthMessage.SerializeToString, response_deserializer=backend__pb2.StatusResponse.FromString, ) + self.StoresSet = channel.unary_unary( + '/backend.Backend/StoresSet', + request_serializer=backend__pb2.StoresSetOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresDelete = channel.unary_unary( + '/backend.Backend/StoresDelete', + request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.StoresGet = channel.unary_unary( + '/backend.Backend/StoresGet', + request_serializer=backend__pb2.StoresGetOptions.SerializeToString, + response_deserializer=backend__pb2.StoresGetResult.FromString, + ) + self.StoresFind = channel.unary_unary( + '/backend.Backend/StoresFind', + request_serializer=backend__pb2.StoresFindOptions.SerializeToString, + response_deserializer=backend__pb2.StoresFindResult.FromString, + ) class BackendServicer(object): @@ -129,6 +149,30 @@ class BackendServicer(object): context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') + def StoresSet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresDelete(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresGet(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StoresFind(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + def add_BackendServicer_to_server(servicer, server): rpc_method_handlers = { @@ -182,6 +226,26 @@ def add_BackendServicer_to_server(servicer, server): request_deserializer=backend__pb2.HealthMessage.FromString, response_serializer=backend__pb2.StatusResponse.SerializeToString, ), + 'StoresSet': grpc.unary_unary_rpc_method_handler( + servicer.StoresSet, + request_deserializer=backend__pb2.StoresSetOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresDelete': grpc.unary_unary_rpc_method_handler( + servicer.StoresDelete, + request_deserializer=backend__pb2.StoresDeleteOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'StoresGet': grpc.unary_unary_rpc_method_handler( + servicer.StoresGet, + request_deserializer=backend__pb2.StoresGetOptions.FromString, + response_serializer=backend__pb2.StoresGetResult.SerializeToString, + ), + 'StoresFind': grpc.unary_unary_rpc_method_handler( + servicer.StoresFind, + request_deserializer=backend__pb2.StoresFindOptions.FromString, + response_serializer=backend__pb2.StoresFindResult.SerializeToString, + ), } generic_handler = grpc.method_handlers_generic_handler( 'backend.Backend', rpc_method_handlers) @@ -361,3 +425,71 @@ class Backend(object): backend__pb2.StatusResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresSet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', + backend__pb2.StoresSetOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresDelete(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', + backend__pb2.StoresDeleteOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresGet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', + backend__pb2.StoresGetOptions.SerializeToString, + backend__pb2.StoresGetResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def StoresFind(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', + backend__pb2.StoresFindOptions.SerializeToString, + backend__pb2.StoresFindResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/vllm/backend_vllm.py b/backend/python/vllm/backend_vllm.py index ef5134b8..ff0f0b26 100644 --- a/backend/python/vllm/backend_vllm.py +++ b/backend/python/vllm/backend_vllm.py @@ -14,6 +14,7 @@ from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.async_llm_engine import AsyncLLMEngine from vllm.sampling_params import SamplingParams from vllm.utils import random_uuid +from vllm.transformers_utils.tokenizer import get_tokenizer _ONE_DAY_IN_SECONDS = 60 * 60 * 24 @@ -71,7 +72,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): """ return backend_pb2.Reply(message=bytes("OK", 'utf-8')) - def LoadModel(self, request, context): + async def LoadModel(self, request, context): """ Loads a language model. @@ -103,6 +104,18 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): self.llm = AsyncLLMEngine.from_engine_args(engine_args) except Exception as err: return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") + + try: + engine_model_config = await self.llm.get_model_config() + self.tokenizer = get_tokenizer( + engine_model_config.tokenizer, + tokenizer_mode=engine_model_config.tokenizer_mode, + trust_remote_code=engine_model_config.trust_remote_code, + truncation_side="left", + ) + except Exception as err: + return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") + return backend_pb2.Result(message="Model loaded successfully", success=True) async def Predict(self, request, context): @@ -161,9 +174,15 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): if request.Seed != 0: sampling_params.seed = request.Seed + prompt = request.Prompt + + # If tokenizer template is enabled and messages are provided instead of prompt apply the tokenizer template + if not request.Prompt and request.UseTokenizerTemplate and request.Messages: + prompt = self.tokenizer.apply_chat_template(request.Messages, tokenize=False, add_generation_prompt=True) + # Generate text request_id = random_uuid() - outputs = self.llm.generate(request.Prompt, sampling_params, request_id) + outputs = self.llm.generate(prompt, sampling_params, request_id) # Stream the results generated_text = "" diff --git a/core/backend/llm.go b/core/backend/llm.go index d5e14df0..493dc25c 100644 --- a/core/backend/llm.go +++ b/core/backend/llm.go @@ -2,6 +2,7 @@ package backend import ( "context" + "fmt" "os" "regexp" "strings" @@ -9,9 +10,11 @@ import ( "unicode/utf8" "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/schema" "github.com/go-skynet/LocalAI/pkg/gallery" "github.com/go-skynet/LocalAI/pkg/grpc" + "github.com/go-skynet/LocalAI/pkg/grpc/proto" model "github.com/go-skynet/LocalAI/pkg/model" "github.com/go-skynet/LocalAI/pkg/utils" ) @@ -26,7 +29,7 @@ type TokenUsage struct { Completion int } -func ModelInference(ctx context.Context, s string, images []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) { +func ModelInference(ctx context.Context, s string, messages []schema.Message, images []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) { modelFile := c.Model threads := c.Threads if *threads == 0 && o.Threads != 0 { @@ -71,10 +74,30 @@ func ModelInference(ctx context.Context, s string, images []string, loader *mode return nil, err } + var protoMessages []*proto.Message + // if we are using the tokenizer template, we need to convert the messages to proto messages + // unless the prompt has already been tokenized (non-chat endpoints + functions) + if c.TemplateConfig.UseTokenizerTemplate && s == "" { + protoMessages = make([]*proto.Message, len(messages), len(messages)) + for i, message := range messages { + protoMessages[i] = &proto.Message{ + Role: message.Role, + } + switch ct := message.Content.(type) { + case string: + protoMessages[i].Content = ct + default: + return nil, fmt.Errorf("Unsupported type for schema.Message.Content for inference: %T", ct) + } + } + } + // in GRPC, the backend is supposed to answer to 1 single token if stream is not supported fn := func() (LLMResponse, error) { opts := gRPCPredictOpts(c, loader.ModelPath) opts.Prompt = s + opts.Messages = protoMessages + opts.UseTokenizerTemplate = c.TemplateConfig.UseTokenizerTemplate opts.Images = images tokenUsage := TokenUsage{} diff --git a/core/config/backend_config.go b/core/config/backend_config.go index a90b1c1b..81c92d01 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -165,11 +165,12 @@ type Functions struct { } type TemplateConfig struct { - Chat string `yaml:"chat"` - ChatMessage string `yaml:"chat_message"` - Completion string `yaml:"completion"` - Edit string `yaml:"edit"` - Functions string `yaml:"function"` + Chat string `yaml:"chat"` + ChatMessage string `yaml:"chat_message"` + Completion string `yaml:"completion"` + Edit string `yaml:"edit"` + Functions string `yaml:"function"` + UseTokenizerTemplate bool `yaml:"use_tokenizer_template"` } func (c *BackendConfig) SetFunctionCallString(s string) { diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index 871ae6c1..36d1142b 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -230,112 +230,154 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup var predInput string - suppressConfigSystemPrompt := false - mess := []string{} - for messageIndex, i := range input.Messages { - var content string - role := i.Role + // If we are using the tokenizer template, we don't need to process the messages + // unless we are processing functions + if !config.TemplateConfig.UseTokenizerTemplate || processFunctions { - // if function call, we might want to customize the role so we can display better that the "assistant called a json action" - // if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request - if (i.FunctionCall != nil || i.ToolCalls != nil) && i.Role == "assistant" { - roleFn := "assistant_function_call" - r := config.Roles[roleFn] - if r != "" { - role = roleFn - } - } - r := config.Roles[role] - contentExists := i.Content != nil && i.StringContent != "" + suppressConfigSystemPrompt := false + mess := []string{} + for messageIndex, i := range input.Messages { + var content string + role := i.Role - fcall := i.FunctionCall - if len(i.ToolCalls) > 0 { - fcall = i.ToolCalls - } - - // First attempt to populate content via a chat message specific template - if config.TemplateConfig.ChatMessage != "" { - chatMessageData := model.ChatMessageTemplateData{ - SystemPrompt: config.SystemPrompt, - Role: r, - RoleName: role, - Content: i.StringContent, - FunctionCall: fcall, - FunctionName: i.Name, - LastMessage: messageIndex == (len(input.Messages) - 1), - Function: config.Grammar != "" && (messageIndex == (len(input.Messages) - 1)), - MessageIndex: messageIndex, - } - templatedChatMessage, err := ml.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData) - if err != nil { - log.Error().Err(err).Interface("message", chatMessageData).Str("template", config.TemplateConfig.ChatMessage).Msg("error processing message with template, skipping") - } else { - if templatedChatMessage == "" { - log.Warn().Msgf("template \"%s\" produced blank output for %+v. Skipping!", config.TemplateConfig.ChatMessage, chatMessageData) - continue // TODO: This continue is here intentionally to skip over the line `mess = append(mess, content)` below, and to prevent the sprintf + // if function call, we might want to customize the role so we can display better that the "assistant called a json action" + // if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request + if (i.FunctionCall != nil || i.ToolCalls != nil) && i.Role == "assistant" { + roleFn := "assistant_function_call" + r := config.Roles[roleFn] + if r != "" { + role = roleFn } - log.Debug().Msgf("templated message for chat: %s", templatedChatMessage) - content = templatedChatMessage } - } + r := config.Roles[role] + contentExists := i.Content != nil && i.StringContent != "" - marshalAnyRole := func(f any) { - j, err := json.Marshal(f) - if err == nil { - if contentExists { - content += "\n" + fmt.Sprint(r, " ", string(j)) + fcall := i.FunctionCall + if len(i.ToolCalls) > 0 { + fcall = i.ToolCalls + } + + // First attempt to populate content via a chat message specific template + if config.TemplateConfig.ChatMessage != "" { + chatMessageData := model.ChatMessageTemplateData{ + SystemPrompt: config.SystemPrompt, + Role: r, + RoleName: role, + Content: i.StringContent, + FunctionCall: fcall, + FunctionName: i.Name, + LastMessage: messageIndex == (len(input.Messages) - 1), + Function: config.Grammar != "" && (messageIndex == (len(input.Messages) - 1)), + MessageIndex: messageIndex, + } + templatedChatMessage, err := ml.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData) + if err != nil { + log.Error().Err(err).Interface("message", chatMessageData).Str("template", config.TemplateConfig.ChatMessage).Msg("error processing message with template, skipping") } else { - content = fmt.Sprint(r, " ", string(j)) + if templatedChatMessage == "" { + log.Warn().Msgf("template \"%s\" produced blank output for %+v. Skipping!", config.TemplateConfig.ChatMessage, chatMessageData) + continue // TODO: This continue is here intentionally to skip over the line `mess = append(mess, content)` below, and to prevent the sprintf + } + log.Debug().Msgf("templated message for chat: %s", templatedChatMessage) + content = templatedChatMessage } } - } - marshalAny := func(f any) { - j, err := json.Marshal(f) - if err == nil { - if contentExists { - content += "\n" + string(j) + + marshalAnyRole := func(f any) { + j, err := json.Marshal(f) + if err == nil { + if contentExists { + content += "\n" + fmt.Sprint(r, " ", string(j)) + } else { + content = fmt.Sprint(r, " ", string(j)) + } + } + } + marshalAny := func(f any) { + j, err := json.Marshal(f) + if err == nil { + if contentExists { + content += "\n" + string(j) + } else { + content = string(j) + } + } + } + // If this model doesn't have such a template, or if that template fails to return a value, template at the message level. + if content == "" { + if r != "" { + if contentExists { + content = fmt.Sprint(r, i.StringContent) + } + + if i.FunctionCall != nil { + marshalAnyRole(i.FunctionCall) + } + if i.ToolCalls != nil { + marshalAnyRole(i.ToolCalls) + } } else { - content = string(j) + if contentExists { + content = fmt.Sprint(i.StringContent) + } + if i.FunctionCall != nil { + marshalAny(i.FunctionCall) + } + if i.ToolCalls != nil { + marshalAny(i.ToolCalls) + } + } + // Special Handling: System. We care if it was printed at all, not the r branch, so check seperately + if contentExists && role == "system" { + suppressConfigSystemPrompt = true } } - } - // If this model doesn't have such a template, or if that template fails to return a value, template at the message level. - if content == "" { - if r != "" { - if contentExists { - content = fmt.Sprint(r, i.StringContent) - } - if i.FunctionCall != nil { - marshalAnyRole(i.FunctionCall) - } - if i.ToolCalls != nil { - marshalAnyRole(i.ToolCalls) - } + mess = append(mess, content) + } + + predInput = strings.Join(mess, "\n") + log.Debug().Msgf("Prompt (before templating): %s", predInput) + + templateFile := "" + + // A model can have a "file.bin.tmpl" file associated with a prompt template prefix + if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) { + templateFile = config.Model + } + + if config.TemplateConfig.Chat != "" && !processFunctions { + templateFile = config.TemplateConfig.Chat + } + + if config.TemplateConfig.Functions != "" && processFunctions { + templateFile = config.TemplateConfig.Functions + } + + if templateFile != "" { + templatedInput, err := ml.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{ + SystemPrompt: config.SystemPrompt, + SuppressSystemPrompt: suppressConfigSystemPrompt, + Input: predInput, + Functions: funcs, + }) + if err == nil { + predInput = templatedInput + log.Debug().Msgf("Template found, input modified to: %s", predInput) } else { - if contentExists { - content = fmt.Sprint(i.StringContent) - } - if i.FunctionCall != nil { - marshalAny(i.FunctionCall) - } - if i.ToolCalls != nil { - marshalAny(i.ToolCalls) - } - } - // Special Handling: System. We care if it was printed at all, not the r branch, so check seperately - if contentExists && role == "system" { - suppressConfigSystemPrompt = true + log.Debug().Msgf("Template failed loading: %s", err.Error()) } } - mess = append(mess, content) + log.Debug().Msgf("Prompt (after templating): %s", predInput) + if processFunctions { + log.Debug().Msgf("Grammar: %+v", config.Grammar) + } } - predInput = strings.Join(mess, "\n") - log.Debug().Msgf("Prompt (before templating): %s", predInput) + switch { + case toStream: - if toStream { log.Debug().Msgf("Stream request received") c.Context().SetContentType("text/event-stream") //c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8) @@ -343,45 +385,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup c.Set("Cache-Control", "no-cache") c.Set("Connection", "keep-alive") c.Set("Transfer-Encoding", "chunked") - } - templateFile := "" - - // A model can have a "file.bin.tmpl" file associated with a prompt template prefix - if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) { - templateFile = config.Model - } - - if config.TemplateConfig.Chat != "" && !processFunctions { - templateFile = config.TemplateConfig.Chat - } - - if config.TemplateConfig.Functions != "" && processFunctions { - templateFile = config.TemplateConfig.Functions - } - - if templateFile != "" { - templatedInput, err := ml.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{ - SystemPrompt: config.SystemPrompt, - SuppressSystemPrompt: suppressConfigSystemPrompt, - Input: predInput, - Functions: funcs, - }) - if err == nil { - predInput = templatedInput - log.Debug().Msgf("Template found, input modified to: %s", predInput) - } else { - log.Debug().Msgf("Template failed loading: %s", err.Error()) - } - } - - log.Debug().Msgf("Prompt (after templating): %s", predInput) - if processFunctions { - log.Debug().Msgf("Grammar: %+v", config.Grammar) - } - - switch { - case toStream: responses := make(chan schema.OpenAIResponse) if !processFunctions { @@ -563,7 +567,7 @@ func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, m images = append(images, m.StringImages...) } - predFunc, err := backend.ModelInference(input.Context, prompt, images, ml, *config, o, nil) + predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, ml, *config, o, nil) if err != nil { log.Error().Err(err).Msg("model inference failed") return "", err diff --git a/core/http/endpoints/openai/inference.go b/core/http/endpoints/openai/inference.go index 5d97d21d..06e784b7 100644 --- a/core/http/endpoints/openai/inference.go +++ b/core/http/endpoints/openai/inference.go @@ -29,7 +29,7 @@ func ComputeChoices( } // get the model function to call for the result - predFunc, err := backend.ModelInference(req.Context, predInput, images, loader, *config, o, tokenCallback) + predFunc, err := backend.ModelInference(req.Context, predInput, req.Messages, images, loader, *config, o, tokenCallback) if err != nil { return result, backend.TokenUsage{}, err } diff --git a/pkg/grpc/proto/backend.pb.go b/pkg/grpc/proto/backend.pb.go index cc687577..e9afe196 100644 --- a/pkg/grpc/proto/backend.pb.go +++ b/pkg/grpc/proto/backend.pb.go @@ -1,7 +1,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.31.0 -// protoc v4.23.4 +// protoc-gen-go v1.26.0 +// protoc v5.26.1 // source: backend.proto package proto @@ -532,47 +532,49 @@ type PredictOptions struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Prompt string `protobuf:"bytes,1,opt,name=Prompt,proto3" json:"Prompt,omitempty"` - Seed int32 `protobuf:"varint,2,opt,name=Seed,proto3" json:"Seed,omitempty"` - Threads int32 `protobuf:"varint,3,opt,name=Threads,proto3" json:"Threads,omitempty"` - Tokens int32 `protobuf:"varint,4,opt,name=Tokens,proto3" json:"Tokens,omitempty"` - TopK int32 `protobuf:"varint,5,opt,name=TopK,proto3" json:"TopK,omitempty"` - Repeat int32 `protobuf:"varint,6,opt,name=Repeat,proto3" json:"Repeat,omitempty"` - Batch int32 `protobuf:"varint,7,opt,name=Batch,proto3" json:"Batch,omitempty"` - NKeep int32 `protobuf:"varint,8,opt,name=NKeep,proto3" json:"NKeep,omitempty"` - Temperature float32 `protobuf:"fixed32,9,opt,name=Temperature,proto3" json:"Temperature,omitempty"` - Penalty float32 `protobuf:"fixed32,10,opt,name=Penalty,proto3" json:"Penalty,omitempty"` - F16KV bool `protobuf:"varint,11,opt,name=F16KV,proto3" json:"F16KV,omitempty"` - DebugMode bool `protobuf:"varint,12,opt,name=DebugMode,proto3" json:"DebugMode,omitempty"` - StopPrompts []string `protobuf:"bytes,13,rep,name=StopPrompts,proto3" json:"StopPrompts,omitempty"` - IgnoreEOS bool `protobuf:"varint,14,opt,name=IgnoreEOS,proto3" json:"IgnoreEOS,omitempty"` - TailFreeSamplingZ float32 `protobuf:"fixed32,15,opt,name=TailFreeSamplingZ,proto3" json:"TailFreeSamplingZ,omitempty"` - TypicalP float32 `protobuf:"fixed32,16,opt,name=TypicalP,proto3" json:"TypicalP,omitempty"` - FrequencyPenalty float32 `protobuf:"fixed32,17,opt,name=FrequencyPenalty,proto3" json:"FrequencyPenalty,omitempty"` - PresencePenalty float32 `protobuf:"fixed32,18,opt,name=PresencePenalty,proto3" json:"PresencePenalty,omitempty"` - Mirostat int32 `protobuf:"varint,19,opt,name=Mirostat,proto3" json:"Mirostat,omitempty"` - MirostatETA float32 `protobuf:"fixed32,20,opt,name=MirostatETA,proto3" json:"MirostatETA,omitempty"` - MirostatTAU float32 `protobuf:"fixed32,21,opt,name=MirostatTAU,proto3" json:"MirostatTAU,omitempty"` - PenalizeNL bool `protobuf:"varint,22,opt,name=PenalizeNL,proto3" json:"PenalizeNL,omitempty"` - LogitBias string `protobuf:"bytes,23,opt,name=LogitBias,proto3" json:"LogitBias,omitempty"` - MLock bool `protobuf:"varint,25,opt,name=MLock,proto3" json:"MLock,omitempty"` - MMap bool `protobuf:"varint,26,opt,name=MMap,proto3" json:"MMap,omitempty"` - PromptCacheAll bool `protobuf:"varint,27,opt,name=PromptCacheAll,proto3" json:"PromptCacheAll,omitempty"` - PromptCacheRO bool `protobuf:"varint,28,opt,name=PromptCacheRO,proto3" json:"PromptCacheRO,omitempty"` - Grammar string `protobuf:"bytes,29,opt,name=Grammar,proto3" json:"Grammar,omitempty"` - MainGPU string `protobuf:"bytes,30,opt,name=MainGPU,proto3" json:"MainGPU,omitempty"` - TensorSplit string `protobuf:"bytes,31,opt,name=TensorSplit,proto3" json:"TensorSplit,omitempty"` - TopP float32 `protobuf:"fixed32,32,opt,name=TopP,proto3" json:"TopP,omitempty"` - PromptCachePath string `protobuf:"bytes,33,opt,name=PromptCachePath,proto3" json:"PromptCachePath,omitempty"` - Debug bool `protobuf:"varint,34,opt,name=Debug,proto3" json:"Debug,omitempty"` - EmbeddingTokens []int32 `protobuf:"varint,35,rep,packed,name=EmbeddingTokens,proto3" json:"EmbeddingTokens,omitempty"` - Embeddings string `protobuf:"bytes,36,opt,name=Embeddings,proto3" json:"Embeddings,omitempty"` - RopeFreqBase float32 `protobuf:"fixed32,37,opt,name=RopeFreqBase,proto3" json:"RopeFreqBase,omitempty"` - RopeFreqScale float32 `protobuf:"fixed32,38,opt,name=RopeFreqScale,proto3" json:"RopeFreqScale,omitempty"` - NegativePromptScale float32 `protobuf:"fixed32,39,opt,name=NegativePromptScale,proto3" json:"NegativePromptScale,omitempty"` - NegativePrompt string `protobuf:"bytes,40,opt,name=NegativePrompt,proto3" json:"NegativePrompt,omitempty"` - NDraft int32 `protobuf:"varint,41,opt,name=NDraft,proto3" json:"NDraft,omitempty"` - Images []string `protobuf:"bytes,42,rep,name=Images,proto3" json:"Images,omitempty"` + Prompt string `protobuf:"bytes,1,opt,name=Prompt,proto3" json:"Prompt,omitempty"` + Seed int32 `protobuf:"varint,2,opt,name=Seed,proto3" json:"Seed,omitempty"` + Threads int32 `protobuf:"varint,3,opt,name=Threads,proto3" json:"Threads,omitempty"` + Tokens int32 `protobuf:"varint,4,opt,name=Tokens,proto3" json:"Tokens,omitempty"` + TopK int32 `protobuf:"varint,5,opt,name=TopK,proto3" json:"TopK,omitempty"` + Repeat int32 `protobuf:"varint,6,opt,name=Repeat,proto3" json:"Repeat,omitempty"` + Batch int32 `protobuf:"varint,7,opt,name=Batch,proto3" json:"Batch,omitempty"` + NKeep int32 `protobuf:"varint,8,opt,name=NKeep,proto3" json:"NKeep,omitempty"` + Temperature float32 `protobuf:"fixed32,9,opt,name=Temperature,proto3" json:"Temperature,omitempty"` + Penalty float32 `protobuf:"fixed32,10,opt,name=Penalty,proto3" json:"Penalty,omitempty"` + F16KV bool `protobuf:"varint,11,opt,name=F16KV,proto3" json:"F16KV,omitempty"` + DebugMode bool `protobuf:"varint,12,opt,name=DebugMode,proto3" json:"DebugMode,omitempty"` + StopPrompts []string `protobuf:"bytes,13,rep,name=StopPrompts,proto3" json:"StopPrompts,omitempty"` + IgnoreEOS bool `protobuf:"varint,14,opt,name=IgnoreEOS,proto3" json:"IgnoreEOS,omitempty"` + TailFreeSamplingZ float32 `protobuf:"fixed32,15,opt,name=TailFreeSamplingZ,proto3" json:"TailFreeSamplingZ,omitempty"` + TypicalP float32 `protobuf:"fixed32,16,opt,name=TypicalP,proto3" json:"TypicalP,omitempty"` + FrequencyPenalty float32 `protobuf:"fixed32,17,opt,name=FrequencyPenalty,proto3" json:"FrequencyPenalty,omitempty"` + PresencePenalty float32 `protobuf:"fixed32,18,opt,name=PresencePenalty,proto3" json:"PresencePenalty,omitempty"` + Mirostat int32 `protobuf:"varint,19,opt,name=Mirostat,proto3" json:"Mirostat,omitempty"` + MirostatETA float32 `protobuf:"fixed32,20,opt,name=MirostatETA,proto3" json:"MirostatETA,omitempty"` + MirostatTAU float32 `protobuf:"fixed32,21,opt,name=MirostatTAU,proto3" json:"MirostatTAU,omitempty"` + PenalizeNL bool `protobuf:"varint,22,opt,name=PenalizeNL,proto3" json:"PenalizeNL,omitempty"` + LogitBias string `protobuf:"bytes,23,opt,name=LogitBias,proto3" json:"LogitBias,omitempty"` + MLock bool `protobuf:"varint,25,opt,name=MLock,proto3" json:"MLock,omitempty"` + MMap bool `protobuf:"varint,26,opt,name=MMap,proto3" json:"MMap,omitempty"` + PromptCacheAll bool `protobuf:"varint,27,opt,name=PromptCacheAll,proto3" json:"PromptCacheAll,omitempty"` + PromptCacheRO bool `protobuf:"varint,28,opt,name=PromptCacheRO,proto3" json:"PromptCacheRO,omitempty"` + Grammar string `protobuf:"bytes,29,opt,name=Grammar,proto3" json:"Grammar,omitempty"` + MainGPU string `protobuf:"bytes,30,opt,name=MainGPU,proto3" json:"MainGPU,omitempty"` + TensorSplit string `protobuf:"bytes,31,opt,name=TensorSplit,proto3" json:"TensorSplit,omitempty"` + TopP float32 `protobuf:"fixed32,32,opt,name=TopP,proto3" json:"TopP,omitempty"` + PromptCachePath string `protobuf:"bytes,33,opt,name=PromptCachePath,proto3" json:"PromptCachePath,omitempty"` + Debug bool `protobuf:"varint,34,opt,name=Debug,proto3" json:"Debug,omitempty"` + EmbeddingTokens []int32 `protobuf:"varint,35,rep,packed,name=EmbeddingTokens,proto3" json:"EmbeddingTokens,omitempty"` + Embeddings string `protobuf:"bytes,36,opt,name=Embeddings,proto3" json:"Embeddings,omitempty"` + RopeFreqBase float32 `protobuf:"fixed32,37,opt,name=RopeFreqBase,proto3" json:"RopeFreqBase,omitempty"` + RopeFreqScale float32 `protobuf:"fixed32,38,opt,name=RopeFreqScale,proto3" json:"RopeFreqScale,omitempty"` + NegativePromptScale float32 `protobuf:"fixed32,39,opt,name=NegativePromptScale,proto3" json:"NegativePromptScale,omitempty"` + NegativePrompt string `protobuf:"bytes,40,opt,name=NegativePrompt,proto3" json:"NegativePrompt,omitempty"` + NDraft int32 `protobuf:"varint,41,opt,name=NDraft,proto3" json:"NDraft,omitempty"` + Images []string `protobuf:"bytes,42,rep,name=Images,proto3" json:"Images,omitempty"` + UseTokenizerTemplate bool `protobuf:"varint,43,opt,name=UseTokenizerTemplate,proto3" json:"UseTokenizerTemplate,omitempty"` + Messages []*Message `protobuf:"bytes,44,rep,name=Messages,proto3" json:"Messages,omitempty"` } func (x *PredictOptions) Reset() { @@ -894,6 +896,20 @@ func (x *PredictOptions) GetImages() []string { return nil } +func (x *PredictOptions) GetUseTokenizerTemplate() bool { + if x != nil { + return x.UseTokenizerTemplate + } + return false +} + +func (x *PredictOptions) GetMessages() []*Message { + if x != nil { + return x.Messages + } + return nil +} + // The response message containing the result type Reply struct { state protoimpl.MessageState @@ -2080,6 +2096,61 @@ func (x *StatusResponse) GetMemory() *MemoryUsageData { return nil } +type Message struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Role string `protobuf:"bytes,1,opt,name=role,proto3" json:"role,omitempty"` + Content string `protobuf:"bytes,2,opt,name=content,proto3" json:"content,omitempty"` +} + +func (x *Message) Reset() { + *x = Message{} + if protoimpl.UnsafeEnabled { + mi := &file_backend_proto_msgTypes[22] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *Message) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Message) ProtoMessage() {} + +func (x *Message) ProtoReflect() protoreflect.Message { + mi := &file_backend_proto_msgTypes[22] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Message.ProtoReflect.Descriptor instead. +func (*Message) Descriptor() ([]byte, []int) { + return file_backend_proto_rawDescGZIP(), []int{22} +} + +func (x *Message) GetRole() string { + if x != nil { + return x.Role + } + return "" +} + +func (x *Message) GetContent() string { + if x != nil { + return x.Content + } + return "" +} + var File_backend_proto protoreflect.FileDescriptor var file_backend_proto_rawDesc = []byte{ @@ -2125,7 +2196,7 @@ var file_backend_proto_rawDesc = []byte{ 0x6d, 0x69, 0x6c, 0x61, 0x72, 0x69, 0x74, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x02, 0x52, 0x0c, 0x53, 0x69, 0x6d, 0x69, 0x6c, 0x61, 0x72, 0x69, 0x74, 0x69, 0x65, 0x73, 0x22, 0x0f, 0x0a, 0x0d, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, - 0xf4, 0x09, 0x0a, 0x0e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, + 0xd6, 0x0a, 0x0a, 0x0e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x53, 0x65, 0x65, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x53, 0x65, 0x65, 0x64, 0x12, 0x18, @@ -2204,257 +2275,267 @@ var file_backend_proto_rawDesc = []byte{ 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x4e, 0x44, 0x72, 0x61, 0x66, 0x74, 0x18, 0x29, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x4e, 0x44, 0x72, 0x61, 0x66, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x73, 0x18, 0x2a, 0x20, 0x03, 0x28, 0x09, 0x52, 0x06, - 0x49, 0x6d, 0x61, 0x67, 0x65, 0x73, 0x22, 0x21, 0x0a, 0x05, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x12, - 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, - 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0xa2, 0x0d, 0x0a, 0x0c, 0x4d, 0x6f, - 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x4d, 0x6f, - 0x64, 0x65, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x4d, 0x6f, 0x64, 0x65, 0x6c, - 0x12, 0x20, 0x0a, 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x53, 0x69, 0x7a, 0x65, 0x18, - 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x53, 0x69, - 0x7a, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x53, 0x65, 0x65, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, - 0x52, 0x04, 0x53, 0x65, 0x65, 0x64, 0x12, 0x16, 0x0a, 0x06, 0x4e, 0x42, 0x61, 0x74, 0x63, 0x68, - 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x4e, 0x42, 0x61, 0x74, 0x63, 0x68, 0x12, 0x1c, - 0x0a, 0x09, 0x46, 0x31, 0x36, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x18, 0x05, 0x20, 0x01, 0x28, - 0x08, 0x52, 0x09, 0x46, 0x31, 0x36, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x12, 0x14, 0x0a, 0x05, - 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x4d, 0x4c, 0x6f, - 0x63, 0x6b, 0x12, 0x12, 0x0a, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x18, 0x07, 0x20, 0x01, 0x28, 0x08, - 0x52, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x12, 0x1c, 0x0a, 0x09, 0x56, 0x6f, 0x63, 0x61, 0x62, 0x4f, - 0x6e, 0x6c, 0x79, 0x18, 0x08, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x56, 0x6f, 0x63, 0x61, 0x62, - 0x4f, 0x6e, 0x6c, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x4c, 0x6f, 0x77, 0x56, 0x52, 0x41, 0x4d, 0x18, - 0x09, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x4c, 0x6f, 0x77, 0x56, 0x52, 0x41, 0x4d, 0x12, 0x1e, - 0x0a, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x0a, 0x20, 0x01, - 0x28, 0x08, 0x52, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x12, 0x12, - 0x0a, 0x04, 0x4e, 0x55, 0x4d, 0x41, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x4e, 0x55, - 0x4d, 0x41, 0x12, 0x1e, 0x0a, 0x0a, 0x4e, 0x47, 0x50, 0x55, 0x4c, 0x61, 0x79, 0x65, 0x72, 0x73, - 0x18, 0x0c, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0a, 0x4e, 0x47, 0x50, 0x55, 0x4c, 0x61, 0x79, 0x65, - 0x72, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x18, 0x0d, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x12, 0x20, 0x0a, 0x0b, - 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74, 0x18, 0x0e, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74, 0x12, 0x18, - 0x0a, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, 0x0f, 0x20, 0x01, 0x28, 0x05, 0x52, - 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x12, 0x2c, 0x0a, 0x11, 0x4c, 0x69, 0x62, 0x72, - 0x61, 0x72, 0x79, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, 0x50, 0x61, 0x74, 0x68, 0x18, 0x10, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x11, 0x4c, 0x69, 0x62, 0x72, 0x61, 0x72, 0x79, 0x53, 0x65, 0x61, 0x72, - 0x63, 0x68, 0x50, 0x61, 0x74, 0x68, 0x12, 0x22, 0x0a, 0x0c, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, - 0x65, 0x71, 0x42, 0x61, 0x73, 0x65, 0x18, 0x11, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, 0x52, 0x6f, - 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x42, 0x61, 0x73, 0x65, 0x12, 0x24, 0x0a, 0x0d, 0x52, 0x6f, - 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x12, 0x20, 0x01, 0x28, - 0x02, 0x52, 0x0d, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61, 0x6c, 0x65, - 0x12, 0x1e, 0x0a, 0x0a, 0x52, 0x4d, 0x53, 0x4e, 0x6f, 0x72, 0x6d, 0x45, 0x70, 0x73, 0x18, 0x13, - 0x20, 0x01, 0x28, 0x02, 0x52, 0x0a, 0x52, 0x4d, 0x53, 0x4e, 0x6f, 0x72, 0x6d, 0x45, 0x70, 0x73, - 0x12, 0x12, 0x0a, 0x04, 0x4e, 0x47, 0x51, 0x41, 0x18, 0x14, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, - 0x4e, 0x47, 0x51, 0x41, 0x12, 0x1c, 0x0a, 0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x46, 0x69, 0x6c, - 0x65, 0x18, 0x15, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x46, 0x69, - 0x6c, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x44, 0x65, 0x76, 0x69, 0x63, 0x65, 0x18, 0x16, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x06, 0x44, 0x65, 0x76, 0x69, 0x63, 0x65, 0x12, 0x1c, 0x0a, 0x09, 0x55, 0x73, - 0x65, 0x54, 0x72, 0x69, 0x74, 0x6f, 0x6e, 0x18, 0x17, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x55, - 0x73, 0x65, 0x54, 0x72, 0x69, 0x74, 0x6f, 0x6e, 0x12, 0x24, 0x0a, 0x0d, 0x4d, 0x6f, 0x64, 0x65, - 0x6c, 0x42, 0x61, 0x73, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x18, 0x18, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x0d, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x42, 0x61, 0x73, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x2a, - 0x0a, 0x10, 0x55, 0x73, 0x65, 0x46, 0x61, 0x73, 0x74, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, - 0x65, 0x72, 0x18, 0x19, 0x20, 0x01, 0x28, 0x08, 0x52, 0x10, 0x55, 0x73, 0x65, 0x46, 0x61, 0x73, - 0x74, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x12, 0x22, 0x0a, 0x0c, 0x50, 0x69, - 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x54, 0x79, 0x70, 0x65, 0x18, 0x1a, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x0c, 0x50, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x54, 0x79, 0x70, 0x65, 0x12, 0x24, - 0x0a, 0x0d, 0x53, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, 0x65, 0x72, 0x54, 0x79, 0x70, 0x65, 0x18, - 0x1b, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x53, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, 0x65, 0x72, - 0x54, 0x79, 0x70, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x43, 0x55, 0x44, 0x41, 0x18, 0x1c, 0x20, 0x01, - 0x28, 0x08, 0x52, 0x04, 0x43, 0x55, 0x44, 0x41, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x46, 0x47, 0x53, - 0x63, 0x61, 0x6c, 0x65, 0x18, 0x1d, 0x20, 0x01, 0x28, 0x02, 0x52, 0x08, 0x43, 0x46, 0x47, 0x53, - 0x63, 0x61, 0x6c, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x49, 0x4d, 0x47, 0x32, 0x49, 0x4d, 0x47, 0x18, - 0x1e, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x49, 0x4d, 0x47, 0x32, 0x49, 0x4d, 0x47, 0x12, 0x1c, - 0x0a, 0x09, 0x43, 0x4c, 0x49, 0x50, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x1f, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x09, 0x43, 0x4c, 0x49, 0x50, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x24, 0x0a, 0x0d, - 0x43, 0x4c, 0x49, 0x50, 0x53, 0x75, 0x62, 0x66, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x18, 0x20, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x0d, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x75, 0x62, 0x66, 0x6f, 0x6c, 0x64, - 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x18, 0x21, - 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x12, 0x1e, - 0x0a, 0x0a, 0x43, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x4e, 0x65, 0x74, 0x18, 0x30, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x0a, 0x43, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x4e, 0x65, 0x74, 0x12, 0x1c, - 0x0a, 0x09, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x18, 0x22, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x09, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, - 0x4c, 0x6f, 0x72, 0x61, 0x42, 0x61, 0x73, 0x65, 0x18, 0x23, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, - 0x4c, 0x6f, 0x72, 0x61, 0x42, 0x61, 0x73, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x4c, 0x6f, 0x72, 0x61, - 0x41, 0x64, 0x61, 0x70, 0x74, 0x65, 0x72, 0x18, 0x24, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x4c, - 0x6f, 0x72, 0x61, 0x41, 0x64, 0x61, 0x70, 0x74, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x4c, 0x6f, - 0x72, 0x61, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x2a, 0x20, 0x01, 0x28, 0x02, 0x52, 0x09, 0x4c, - 0x6f, 0x72, 0x61, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x1c, 0x0a, 0x09, 0x4e, 0x6f, 0x4d, 0x75, - 0x6c, 0x4d, 0x61, 0x74, 0x51, 0x18, 0x25, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x4e, 0x6f, 0x4d, - 0x75, 0x6c, 0x4d, 0x61, 0x74, 0x51, 0x12, 0x1e, 0x0a, 0x0a, 0x44, 0x72, 0x61, 0x66, 0x74, 0x4d, - 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x27, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x44, 0x72, 0x61, 0x66, - 0x74, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x1c, 0x0a, 0x09, 0x41, 0x75, 0x64, 0x69, 0x6f, 0x50, - 0x61, 0x74, 0x68, 0x18, 0x26, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x41, 0x75, 0x64, 0x69, 0x6f, - 0x50, 0x61, 0x74, 0x68, 0x12, 0x22, 0x0a, 0x0c, 0x51, 0x75, 0x61, 0x6e, 0x74, 0x69, 0x7a, 0x61, - 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x28, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x51, 0x75, 0x61, 0x6e, - 0x74, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x32, 0x0a, 0x14, 0x47, 0x50, 0x55, 0x4d, - 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x74, 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, - 0x18, 0x32, 0x20, 0x01, 0x28, 0x02, 0x52, 0x14, 0x47, 0x50, 0x55, 0x4d, 0x65, 0x6d, 0x6f, 0x72, - 0x79, 0x55, 0x74, 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x28, 0x0a, 0x0f, - 0x54, 0x72, 0x75, 0x73, 0x74, 0x52, 0x65, 0x6d, 0x6f, 0x74, 0x65, 0x43, 0x6f, 0x64, 0x65, 0x18, - 0x33, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0f, 0x54, 0x72, 0x75, 0x73, 0x74, 0x52, 0x65, 0x6d, 0x6f, - 0x74, 0x65, 0x43, 0x6f, 0x64, 0x65, 0x12, 0x22, 0x0a, 0x0c, 0x45, 0x6e, 0x66, 0x6f, 0x72, 0x63, - 0x65, 0x45, 0x61, 0x67, 0x65, 0x72, 0x18, 0x34, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0c, 0x45, 0x6e, - 0x66, 0x6f, 0x72, 0x63, 0x65, 0x45, 0x61, 0x67, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x53, 0x77, - 0x61, 0x70, 0x53, 0x70, 0x61, 0x63, 0x65, 0x18, 0x35, 0x20, 0x01, 0x28, 0x05, 0x52, 0x09, 0x53, - 0x77, 0x61, 0x70, 0x53, 0x70, 0x61, 0x63, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x4d, 0x61, 0x78, 0x4d, - 0x6f, 0x64, 0x65, 0x6c, 0x4c, 0x65, 0x6e, 0x18, 0x36, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x4d, - 0x61, 0x78, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4c, 0x65, 0x6e, 0x12, 0x16, 0x0a, 0x06, 0x4d, 0x4d, - 0x50, 0x72, 0x6f, 0x6a, 0x18, 0x29, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x4d, 0x4d, 0x50, 0x72, - 0x6f, 0x6a, 0x12, 0x20, 0x0a, 0x0b, 0x52, 0x6f, 0x70, 0x65, 0x53, 0x63, 0x61, 0x6c, 0x69, 0x6e, - 0x67, 0x18, 0x2b, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x52, 0x6f, 0x70, 0x65, 0x53, 0x63, 0x61, - 0x6c, 0x69, 0x6e, 0x67, 0x12, 0x24, 0x0a, 0x0d, 0x59, 0x61, 0x72, 0x6e, 0x45, 0x78, 0x74, 0x46, - 0x61, 0x63, 0x74, 0x6f, 0x72, 0x18, 0x2c, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0d, 0x59, 0x61, 0x72, - 0x6e, 0x45, 0x78, 0x74, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x12, 0x26, 0x0a, 0x0e, 0x59, 0x61, - 0x72, 0x6e, 0x41, 0x74, 0x74, 0x6e, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x18, 0x2d, 0x20, 0x01, - 0x28, 0x02, 0x52, 0x0e, 0x59, 0x61, 0x72, 0x6e, 0x41, 0x74, 0x74, 0x6e, 0x46, 0x61, 0x63, 0x74, - 0x6f, 0x72, 0x12, 0x22, 0x0a, 0x0c, 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, 0x46, 0x61, - 0x73, 0x74, 0x18, 0x2e, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, - 0x74, 0x61, 0x46, 0x61, 0x73, 0x74, 0x12, 0x22, 0x0a, 0x0c, 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, - 0x74, 0x61, 0x53, 0x6c, 0x6f, 0x77, 0x18, 0x2f, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, 0x59, 0x61, - 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, 0x53, 0x6c, 0x6f, 0x77, 0x12, 0x12, 0x0a, 0x04, 0x54, 0x79, - 0x70, 0x65, 0x18, 0x31, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x54, 0x79, 0x70, 0x65, 0x22, 0x3c, - 0x0a, 0x06, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, - 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, - 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x08, 0x52, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x22, 0x31, 0x0a, 0x0f, - 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, - 0x1e, 0x0a, 0x0a, 0x65, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x01, 0x20, - 0x03, 0x28, 0x02, 0x52, 0x0a, 0x65, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x22, - 0x5b, 0x0a, 0x11, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x71, - 0x75, 0x65, 0x73, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, - 0x67, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, - 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x74, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, 0x04, 0x20, - 0x01, 0x28, 0x0d, 0x52, 0x07, 0x74, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x22, 0x5e, 0x0a, 0x10, - 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, - 0x12, 0x36, 0x0a, 0x08, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, - 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, - 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x52, 0x08, - 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, - 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x22, 0x77, 0x0a, 0x11, - 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, - 0x74, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x02, 0x69, - 0x64, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, - 0x52, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x65, 0x6e, 0x64, 0x18, 0x03, - 0x20, 0x01, 0x28, 0x03, 0x52, 0x03, 0x65, 0x6e, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, - 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, 0x16, 0x0a, - 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x05, 0x52, 0x06, 0x74, - 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x22, 0xbe, 0x02, 0x0a, 0x14, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, - 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x16, - 0x0a, 0x06, 0x68, 0x65, 0x69, 0x67, 0x68, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, - 0x68, 0x65, 0x69, 0x67, 0x68, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x77, 0x69, 0x64, 0x74, 0x68, 0x18, - 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x77, 0x69, 0x64, 0x74, 0x68, 0x12, 0x12, 0x0a, 0x04, - 0x6d, 0x6f, 0x64, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x6d, 0x6f, 0x64, 0x65, - 0x12, 0x12, 0x0a, 0x04, 0x73, 0x74, 0x65, 0x70, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, - 0x73, 0x74, 0x65, 0x70, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x65, 0x65, 0x64, 0x18, 0x05, 0x20, 0x01, - 0x28, 0x05, 0x52, 0x04, 0x73, 0x65, 0x65, 0x64, 0x12, 0x27, 0x0a, 0x0f, 0x70, 0x6f, 0x73, 0x69, - 0x74, 0x69, 0x76, 0x65, 0x5f, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x06, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, - 0x74, 0x12, 0x27, 0x0a, 0x0f, 0x6e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x70, 0x72, - 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x6e, 0x65, 0x67, 0x61, - 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, - 0x74, 0x18, 0x08, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x12, 0x10, 0x0a, 0x03, - 0x73, 0x72, 0x63, 0x18, 0x09, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x72, 0x63, 0x12, 0x2a, - 0x0a, 0x10, 0x45, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, - 0x72, 0x73, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x09, 0x52, 0x10, 0x45, 0x6e, 0x61, 0x62, 0x6c, 0x65, - 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x73, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x4c, - 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x43, 0x4c, - 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x22, 0x5e, 0x0a, 0x0a, 0x54, 0x54, 0x53, 0x52, 0x65, 0x71, - 0x75, 0x65, 0x73, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x6d, 0x6f, 0x64, 0x65, - 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x10, - 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, - 0x12, 0x14, 0x0a, 0x05, 0x76, 0x6f, 0x69, 0x63, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x05, 0x76, 0x6f, 0x69, 0x63, 0x65, 0x22, 0x46, 0x0a, 0x14, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, - 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x16, - 0x0a, 0x06, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, - 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x12, 0x16, 0x0a, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, - 0x18, 0x02, 0x20, 0x03, 0x28, 0x05, 0x52, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x22, 0xac, - 0x01, 0x0a, 0x0f, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65, 0x44, 0x61, - 0x74, 0x61, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x04, 0x52, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x12, 0x45, 0x0a, 0x09, 0x62, 0x72, 0x65, 0x61, - 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x62, 0x61, - 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, - 0x65, 0x44, 0x61, 0x74, 0x61, 0x2e, 0x42, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x45, - 0x6e, 0x74, 0x72, 0x79, 0x52, 0x09, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x1a, - 0x3c, 0x0a, 0x0e, 0x42, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x45, 0x6e, 0x74, 0x72, - 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, - 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, - 0x28, 0x04, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0xbc, 0x01, - 0x0a, 0x0e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, - 0x12, 0x33, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, - 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, - 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, - 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x30, 0x0a, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x18, - 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, - 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65, 0x44, 0x61, 0x74, 0x61, 0x52, - 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x22, 0x43, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, 0x65, - 0x12, 0x11, 0x0a, 0x0d, 0x55, 0x4e, 0x49, 0x4e, 0x49, 0x54, 0x49, 0x41, 0x4c, 0x49, 0x5a, 0x45, - 0x44, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x42, 0x55, 0x53, 0x59, 0x10, 0x01, 0x12, 0x09, 0x0a, - 0x05, 0x52, 0x45, 0x41, 0x44, 0x59, 0x10, 0x02, 0x12, 0x12, 0x0a, 0x05, 0x45, 0x52, 0x52, 0x4f, - 0x52, 0x10, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0x32, 0xfb, 0x06, 0x0a, - 0x07, 0x42, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x12, 0x32, 0x0a, 0x06, 0x48, 0x65, 0x61, 0x6c, - 0x74, 0x68, 0x12, 0x16, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, 0x65, 0x61, - 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, - 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x34, 0x0a, 0x07, - 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, + 0x49, 0x6d, 0x61, 0x67, 0x65, 0x73, 0x12, 0x32, 0x0a, 0x14, 0x55, 0x73, 0x65, 0x54, 0x6f, 0x6b, + 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x18, 0x2b, + 0x20, 0x01, 0x28, 0x08, 0x52, 0x14, 0x55, 0x73, 0x65, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, + 0x65, 0x72, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x12, 0x2c, 0x0a, 0x08, 0x4d, 0x65, + 0x73, 0x73, 0x61, 0x67, 0x65, 0x73, 0x18, 0x2c, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x10, 0x2e, 0x62, + 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x52, 0x08, + 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x73, 0x22, 0x21, 0x0a, 0x05, 0x52, 0x65, 0x70, 0x6c, + 0x79, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x0c, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0xa2, 0x0d, 0x0a, 0x0c, + 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x14, 0x0a, 0x05, + 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x4d, 0x6f, 0x64, + 0x65, 0x6c, 0x12, 0x20, 0x0a, 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x53, 0x69, 0x7a, + 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, + 0x53, 0x69, 0x7a, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x53, 0x65, 0x65, 0x64, 0x18, 0x03, 0x20, 0x01, + 0x28, 0x05, 0x52, 0x04, 0x53, 0x65, 0x65, 0x64, 0x12, 0x16, 0x0a, 0x06, 0x4e, 0x42, 0x61, 0x74, + 0x63, 0x68, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x4e, 0x42, 0x61, 0x74, 0x63, 0x68, + 0x12, 0x1c, 0x0a, 0x09, 0x46, 0x31, 0x36, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x18, 0x05, 0x20, + 0x01, 0x28, 0x08, 0x52, 0x09, 0x46, 0x31, 0x36, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x12, 0x14, + 0x0a, 0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x4d, + 0x4c, 0x6f, 0x63, 0x6b, 0x12, 0x12, 0x0a, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x18, 0x07, 0x20, 0x01, + 0x28, 0x08, 0x52, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x12, 0x1c, 0x0a, 0x09, 0x56, 0x6f, 0x63, 0x61, + 0x62, 0x4f, 0x6e, 0x6c, 0x79, 0x18, 0x08, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x56, 0x6f, 0x63, + 0x61, 0x62, 0x4f, 0x6e, 0x6c, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x4c, 0x6f, 0x77, 0x56, 0x52, 0x41, + 0x4d, 0x18, 0x09, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x4c, 0x6f, 0x77, 0x56, 0x52, 0x41, 0x4d, + 0x12, 0x1e, 0x0a, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x0a, + 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, + 0x12, 0x12, 0x0a, 0x04, 0x4e, 0x55, 0x4d, 0x41, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, + 0x4e, 0x55, 0x4d, 0x41, 0x12, 0x1e, 0x0a, 0x0a, 0x4e, 0x47, 0x50, 0x55, 0x4c, 0x61, 0x79, 0x65, + 0x72, 0x73, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0a, 0x4e, 0x47, 0x50, 0x55, 0x4c, 0x61, + 0x79, 0x65, 0x72, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x18, + 0x0d, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x12, 0x20, + 0x0a, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74, 0x18, 0x0e, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74, + 0x12, 0x18, 0x0a, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, 0x0f, 0x20, 0x01, 0x28, + 0x05, 0x52, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x12, 0x2c, 0x0a, 0x11, 0x4c, 0x69, + 0x62, 0x72, 0x61, 0x72, 0x79, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, 0x50, 0x61, 0x74, 0x68, 0x18, + 0x10, 0x20, 0x01, 0x28, 0x09, 0x52, 0x11, 0x4c, 0x69, 0x62, 0x72, 0x61, 0x72, 0x79, 0x53, 0x65, + 0x61, 0x72, 0x63, 0x68, 0x50, 0x61, 0x74, 0x68, 0x12, 0x22, 0x0a, 0x0c, 0x52, 0x6f, 0x70, 0x65, + 0x46, 0x72, 0x65, 0x71, 0x42, 0x61, 0x73, 0x65, 0x18, 0x11, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, + 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x42, 0x61, 0x73, 0x65, 0x12, 0x24, 0x0a, 0x0d, + 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x12, 0x20, + 0x01, 0x28, 0x02, 0x52, 0x0d, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61, + 0x6c, 0x65, 0x12, 0x1e, 0x0a, 0x0a, 0x52, 0x4d, 0x53, 0x4e, 0x6f, 0x72, 0x6d, 0x45, 0x70, 0x73, + 0x18, 0x13, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0a, 0x52, 0x4d, 0x53, 0x4e, 0x6f, 0x72, 0x6d, 0x45, + 0x70, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x4e, 0x47, 0x51, 0x41, 0x18, 0x14, 0x20, 0x01, 0x28, 0x05, + 0x52, 0x04, 0x4e, 0x47, 0x51, 0x41, 0x12, 0x1c, 0x0a, 0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x46, + 0x69, 0x6c, 0x65, 0x18, 0x15, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c, + 0x46, 0x69, 0x6c, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x44, 0x65, 0x76, 0x69, 0x63, 0x65, 0x18, 0x16, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x44, 0x65, 0x76, 0x69, 0x63, 0x65, 0x12, 0x1c, 0x0a, 0x09, + 0x55, 0x73, 0x65, 0x54, 0x72, 0x69, 0x74, 0x6f, 0x6e, 0x18, 0x17, 0x20, 0x01, 0x28, 0x08, 0x52, + 0x09, 0x55, 0x73, 0x65, 0x54, 0x72, 0x69, 0x74, 0x6f, 0x6e, 0x12, 0x24, 0x0a, 0x0d, 0x4d, 0x6f, + 0x64, 0x65, 0x6c, 0x42, 0x61, 0x73, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x18, 0x18, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x0d, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x42, 0x61, 0x73, 0x65, 0x4e, 0x61, 0x6d, 0x65, + 0x12, 0x2a, 0x0a, 0x10, 0x55, 0x73, 0x65, 0x46, 0x61, 0x73, 0x74, 0x54, 0x6f, 0x6b, 0x65, 0x6e, + 0x69, 0x7a, 0x65, 0x72, 0x18, 0x19, 0x20, 0x01, 0x28, 0x08, 0x52, 0x10, 0x55, 0x73, 0x65, 0x46, + 0x61, 0x73, 0x74, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x12, 0x22, 0x0a, 0x0c, + 0x50, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x54, 0x79, 0x70, 0x65, 0x18, 0x1a, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x0c, 0x50, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x54, 0x79, 0x70, 0x65, + 0x12, 0x24, 0x0a, 0x0d, 0x53, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, 0x65, 0x72, 0x54, 0x79, 0x70, + 0x65, 0x18, 0x1b, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x53, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, + 0x65, 0x72, 0x54, 0x79, 0x70, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x43, 0x55, 0x44, 0x41, 0x18, 0x1c, + 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x43, 0x55, 0x44, 0x41, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x46, + 0x47, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x1d, 0x20, 0x01, 0x28, 0x02, 0x52, 0x08, 0x43, 0x46, + 0x47, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x49, 0x4d, 0x47, 0x32, 0x49, 0x4d, + 0x47, 0x18, 0x1e, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x49, 0x4d, 0x47, 0x32, 0x49, 0x4d, 0x47, + 0x12, 0x1c, 0x0a, 0x09, 0x43, 0x4c, 0x49, 0x50, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x1f, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x09, 0x43, 0x4c, 0x49, 0x50, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x24, + 0x0a, 0x0d, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x75, 0x62, 0x66, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x18, + 0x20, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x75, 0x62, 0x66, 0x6f, + 0x6c, 0x64, 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, + 0x18, 0x21, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, + 0x12, 0x1e, 0x0a, 0x0a, 0x43, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x4e, 0x65, 0x74, 0x18, 0x30, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x43, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x4e, 0x65, 0x74, + 0x12, 0x1c, 0x0a, 0x09, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x18, 0x22, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x09, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x12, 0x1a, + 0x0a, 0x08, 0x4c, 0x6f, 0x72, 0x61, 0x42, 0x61, 0x73, 0x65, 0x18, 0x23, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x08, 0x4c, 0x6f, 0x72, 0x61, 0x42, 0x61, 0x73, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x4c, 0x6f, + 0x72, 0x61, 0x41, 0x64, 0x61, 0x70, 0x74, 0x65, 0x72, 0x18, 0x24, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x0b, 0x4c, 0x6f, 0x72, 0x61, 0x41, 0x64, 0x61, 0x70, 0x74, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, + 0x4c, 0x6f, 0x72, 0x61, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x2a, 0x20, 0x01, 0x28, 0x02, 0x52, + 0x09, 0x4c, 0x6f, 0x72, 0x61, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x1c, 0x0a, 0x09, 0x4e, 0x6f, + 0x4d, 0x75, 0x6c, 0x4d, 0x61, 0x74, 0x51, 0x18, 0x25, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x4e, + 0x6f, 0x4d, 0x75, 0x6c, 0x4d, 0x61, 0x74, 0x51, 0x12, 0x1e, 0x0a, 0x0a, 0x44, 0x72, 0x61, 0x66, + 0x74, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x27, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x44, 0x72, + 0x61, 0x66, 0x74, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x1c, 0x0a, 0x09, 0x41, 0x75, 0x64, 0x69, + 0x6f, 0x50, 0x61, 0x74, 0x68, 0x18, 0x26, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x41, 0x75, 0x64, + 0x69, 0x6f, 0x50, 0x61, 0x74, 0x68, 0x12, 0x22, 0x0a, 0x0c, 0x51, 0x75, 0x61, 0x6e, 0x74, 0x69, + 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x28, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x51, 0x75, + 0x61, 0x6e, 0x74, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x32, 0x0a, 0x14, 0x47, 0x50, + 0x55, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x74, 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, 0x69, + 0x6f, 0x6e, 0x18, 0x32, 0x20, 0x01, 0x28, 0x02, 0x52, 0x14, 0x47, 0x50, 0x55, 0x4d, 0x65, 0x6d, + 0x6f, 0x72, 0x79, 0x55, 0x74, 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x28, + 0x0a, 0x0f, 0x54, 0x72, 0x75, 0x73, 0x74, 0x52, 0x65, 0x6d, 0x6f, 0x74, 0x65, 0x43, 0x6f, 0x64, + 0x65, 0x18, 0x33, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0f, 0x54, 0x72, 0x75, 0x73, 0x74, 0x52, 0x65, + 0x6d, 0x6f, 0x74, 0x65, 0x43, 0x6f, 0x64, 0x65, 0x12, 0x22, 0x0a, 0x0c, 0x45, 0x6e, 0x66, 0x6f, + 0x72, 0x63, 0x65, 0x45, 0x61, 0x67, 0x65, 0x72, 0x18, 0x34, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0c, + 0x45, 0x6e, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x45, 0x61, 0x67, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, + 0x53, 0x77, 0x61, 0x70, 0x53, 0x70, 0x61, 0x63, 0x65, 0x18, 0x35, 0x20, 0x01, 0x28, 0x05, 0x52, + 0x09, 0x53, 0x77, 0x61, 0x70, 0x53, 0x70, 0x61, 0x63, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x4d, 0x61, + 0x78, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4c, 0x65, 0x6e, 0x18, 0x36, 0x20, 0x01, 0x28, 0x05, 0x52, + 0x0b, 0x4d, 0x61, 0x78, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4c, 0x65, 0x6e, 0x12, 0x16, 0x0a, 0x06, + 0x4d, 0x4d, 0x50, 0x72, 0x6f, 0x6a, 0x18, 0x29, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x4d, 0x4d, + 0x50, 0x72, 0x6f, 0x6a, 0x12, 0x20, 0x0a, 0x0b, 0x52, 0x6f, 0x70, 0x65, 0x53, 0x63, 0x61, 0x6c, + 0x69, 0x6e, 0x67, 0x18, 0x2b, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x52, 0x6f, 0x70, 0x65, 0x53, + 0x63, 0x61, 0x6c, 0x69, 0x6e, 0x67, 0x12, 0x24, 0x0a, 0x0d, 0x59, 0x61, 0x72, 0x6e, 0x45, 0x78, + 0x74, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x18, 0x2c, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0d, 0x59, + 0x61, 0x72, 0x6e, 0x45, 0x78, 0x74, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x12, 0x26, 0x0a, 0x0e, + 0x59, 0x61, 0x72, 0x6e, 0x41, 0x74, 0x74, 0x6e, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x18, 0x2d, + 0x20, 0x01, 0x28, 0x02, 0x52, 0x0e, 0x59, 0x61, 0x72, 0x6e, 0x41, 0x74, 0x74, 0x6e, 0x46, 0x61, + 0x63, 0x74, 0x6f, 0x72, 0x12, 0x22, 0x0a, 0x0c, 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, + 0x46, 0x61, 0x73, 0x74, 0x18, 0x2e, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, 0x59, 0x61, 0x72, 0x6e, + 0x42, 0x65, 0x74, 0x61, 0x46, 0x61, 0x73, 0x74, 0x12, 0x22, 0x0a, 0x0c, 0x59, 0x61, 0x72, 0x6e, + 0x42, 0x65, 0x74, 0x61, 0x53, 0x6c, 0x6f, 0x77, 0x18, 0x2f, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, + 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, 0x53, 0x6c, 0x6f, 0x77, 0x12, 0x12, 0x0a, 0x04, + 0x54, 0x79, 0x70, 0x65, 0x18, 0x31, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x54, 0x79, 0x70, 0x65, + 0x22, 0x3c, 0x0a, 0x06, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, + 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, + 0x73, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x18, + 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x22, 0x31, + 0x0a, 0x0f, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, + 0x74, 0x12, 0x1e, 0x0a, 0x0a, 0x65, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, + 0x01, 0x20, 0x03, 0x28, 0x02, 0x52, 0x0a, 0x65, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, + 0x73, 0x22, 0x5b, 0x0a, 0x11, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, + 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x6c, 0x61, 0x6e, 0x67, + 0x75, 0x61, 0x67, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x6c, 0x61, 0x6e, 0x67, + 0x75, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x74, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, + 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x07, 0x74, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x22, 0x5e, + 0x0a, 0x10, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x73, 0x75, + 0x6c, 0x74, 0x12, 0x36, 0x0a, 0x08, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x01, + 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, + 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, + 0x52, 0x08, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, + 0x78, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x22, 0x77, + 0x0a, 0x11, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x53, 0x65, 0x67, 0x6d, + 0x65, 0x6e, 0x74, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, + 0x02, 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x03, 0x52, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x65, 0x6e, 0x64, + 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x03, 0x65, 0x6e, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x74, + 0x65, 0x78, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, + 0x16, 0x0a, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x05, 0x52, + 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x22, 0xbe, 0x02, 0x0a, 0x14, 0x47, 0x65, 0x6e, 0x65, + 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, + 0x12, 0x16, 0x0a, 0x06, 0x68, 0x65, 0x69, 0x67, 0x68, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, + 0x52, 0x06, 0x68, 0x65, 0x69, 0x67, 0x68, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x77, 0x69, 0x64, 0x74, + 0x68, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x77, 0x69, 0x64, 0x74, 0x68, 0x12, 0x12, + 0x0a, 0x04, 0x6d, 0x6f, 0x64, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x6d, 0x6f, + 0x64, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x74, 0x65, 0x70, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, + 0x52, 0x04, 0x73, 0x74, 0x65, 0x70, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x65, 0x65, 0x64, 0x18, 0x05, + 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x73, 0x65, 0x65, 0x64, 0x12, 0x27, 0x0a, 0x0f, 0x70, 0x6f, + 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x06, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, + 0x6d, 0x70, 0x74, 0x12, 0x27, 0x0a, 0x0f, 0x6e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x5f, + 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x6e, 0x65, + 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x10, 0x0a, 0x03, + 0x64, 0x73, 0x74, 0x18, 0x08, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x12, 0x10, + 0x0a, 0x03, 0x73, 0x72, 0x63, 0x18, 0x09, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x72, 0x63, + 0x12, 0x2a, 0x0a, 0x10, 0x45, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65, + 0x74, 0x65, 0x72, 0x73, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x09, 0x52, 0x10, 0x45, 0x6e, 0x61, 0x62, + 0x6c, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x73, 0x12, 0x1a, 0x0a, 0x08, + 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, + 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x22, 0x5e, 0x0a, 0x0a, 0x54, 0x54, 0x53, 0x52, + 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x6d, 0x6f, + 0x64, 0x65, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, + 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, + 0x73, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x6f, 0x69, 0x63, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x05, 0x76, 0x6f, 0x69, 0x63, 0x65, 0x22, 0x46, 0x0a, 0x14, 0x54, 0x6f, 0x6b, 0x65, + 0x6e, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, + 0x12, 0x16, 0x0a, 0x06, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, + 0x52, 0x06, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x12, 0x16, 0x0a, 0x06, 0x74, 0x6f, 0x6b, 0x65, + 0x6e, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x05, 0x52, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, + 0x22, 0xac, 0x01, 0x0a, 0x0f, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65, + 0x44, 0x61, 0x74, 0x61, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x04, 0x52, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x12, 0x45, 0x0a, 0x09, 0x62, 0x72, + 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x27, 0x2e, + 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, + 0x61, 0x67, 0x65, 0x44, 0x61, 0x74, 0x61, 0x2e, 0x42, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, + 0x6e, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x09, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, + 0x6e, 0x1a, 0x3c, 0x0a, 0x0e, 0x42, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x45, 0x6e, + 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, + 0xbc, 0x01, 0x0a, 0x0e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, + 0x73, 0x65, 0x12, 0x33, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x0e, 0x32, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x61, 0x74, + 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, + 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x30, 0x0a, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, + 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, + 0x64, 0x2e, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65, 0x44, 0x61, 0x74, + 0x61, 0x52, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x22, 0x43, 0x0a, 0x05, 0x53, 0x74, 0x61, + 0x74, 0x65, 0x12, 0x11, 0x0a, 0x0d, 0x55, 0x4e, 0x49, 0x4e, 0x49, 0x54, 0x49, 0x41, 0x4c, 0x49, + 0x5a, 0x45, 0x44, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x42, 0x55, 0x53, 0x59, 0x10, 0x01, 0x12, + 0x09, 0x0a, 0x05, 0x52, 0x45, 0x41, 0x44, 0x59, 0x10, 0x02, 0x12, 0x12, 0x0a, 0x05, 0x45, 0x52, + 0x52, 0x4f, 0x52, 0x10, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0x22, 0x37, + 0x0a, 0x07, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x6f, 0x6c, + 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x72, 0x6f, 0x6c, 0x65, 0x12, 0x18, 0x0a, + 0x07, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, + 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x32, 0xfb, 0x06, 0x0a, 0x07, 0x42, 0x61, 0x63, 0x6b, + 0x65, 0x6e, 0x64, 0x12, 0x32, 0x0a, 0x06, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x12, 0x16, 0x2e, + 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, + 0x73, 0x73, 0x61, 0x67, 0x65, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, + 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x34, 0x0a, 0x07, 0x50, 0x72, 0x65, 0x64, 0x69, + 0x63, 0x74, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, + 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0e, 0x2e, 0x62, 0x61, + 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x35, 0x0a, + 0x09, 0x4c, 0x6f, 0x61, 0x64, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x15, 0x2e, 0x62, 0x61, 0x63, + 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, + 0x73, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, + 0x6c, 0x74, 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0d, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x53, + 0x74, 0x72, 0x65, 0x61, 0x6d, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, + 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0e, + 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, + 0x30, 0x01, 0x12, 0x40, 0x0a, 0x09, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x12, + 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, + 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, + 0x6e, 0x64, 0x2e, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, + 0x6c, 0x74, 0x22, 0x00, 0x12, 0x41, 0x0a, 0x0d, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, + 0x49, 0x6d, 0x61, 0x67, 0x65, 0x12, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, + 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x52, 0x65, 0x71, + 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, + 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4d, 0x0a, 0x12, 0x41, 0x75, 0x64, 0x69, 0x6f, + 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x1a, 0x2e, + 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, + 0x70, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x19, 0x2e, 0x62, 0x61, 0x63, 0x6b, + 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, + 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x2d, 0x0a, 0x03, 0x54, 0x54, 0x53, 0x12, 0x13, 0x2e, + 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x54, 0x53, 0x52, 0x65, 0x71, 0x75, 0x65, + 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, + 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4a, 0x0a, 0x0e, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, + 0x65, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, - 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, - 0x22, 0x00, 0x12, 0x35, 0x0a, 0x09, 0x4c, 0x6f, 0x61, 0x64, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, - 0x15, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, + 0x1a, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x6f, 0x6b, 0x65, 0x6e, + 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, + 0x00, 0x12, 0x3b, 0x0a, 0x06, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x16, 0x2e, 0x62, 0x61, + 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73, + 0x61, 0x67, 0x65, 0x1a, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, + 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x39, + 0x0a, 0x09, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x53, 0x65, 0x74, 0x12, 0x19, 0x2e, 0x62, 0x61, + 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x53, 0x65, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, - 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0d, 0x50, 0x72, 0x65, - 0x64, 0x69, 0x63, 0x74, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, - 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, - 0x6f, 0x6e, 0x73, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, - 0x70, 0x6c, 0x79, 0x22, 0x00, 0x30, 0x01, 0x12, 0x40, 0x0a, 0x09, 0x45, 0x6d, 0x62, 0x65, 0x64, - 0x64, 0x69, 0x6e, 0x67, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, - 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x18, 0x2e, - 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, - 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x41, 0x0a, 0x0d, 0x47, 0x65, 0x6e, - 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x12, 0x1d, 0x2e, 0x62, 0x61, 0x63, - 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, - 0x67, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, - 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4d, 0x0a, 0x12, - 0x41, 0x75, 0x64, 0x69, 0x6f, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, - 0x6f, 0x6e, 0x12, 0x1a, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, - 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x19, - 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, - 0x69, 0x70, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x2d, 0x0a, 0x03, 0x54, - 0x54, 0x53, 0x12, 0x13, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x54, 0x53, - 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, - 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4a, 0x0a, 0x0e, 0x54, 0x6f, - 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x12, 0x17, 0x2e, 0x62, - 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, - 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, - 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, - 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x3b, 0x0a, 0x06, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, - 0x12, 0x16, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, - 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x1a, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, - 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, - 0x65, 0x22, 0x00, 0x12, 0x39, 0x0a, 0x09, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x53, 0x65, 0x74, - 0x12, 0x19, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, - 0x73, 0x53, 0x65, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0f, 0x2e, 0x62, 0x61, - 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x3f, - 0x0a, 0x0c, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x12, 0x1c, - 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x44, - 0x65, 0x6c, 0x65, 0x74, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0f, 0x2e, 0x62, - 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, - 0x42, 0x0a, 0x09, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x47, 0x65, 0x74, 0x12, 0x19, 0x2e, 0x62, - 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x47, 0x65, 0x74, - 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, - 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x47, 0x65, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, - 0x74, 0x22, 0x00, 0x12, 0x45, 0x0a, 0x0a, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69, 0x6e, - 0x64, 0x12, 0x1a, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, - 0x65, 0x73, 0x46, 0x69, 0x6e, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x19, 0x2e, - 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69, - 0x6e, 0x64, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x42, 0x5a, 0x0a, 0x19, 0x69, 0x6f, - 0x2e, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2e, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x61, 0x69, 0x2e, - 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x42, 0x0e, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49, - 0x42, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x50, 0x01, 0x5a, 0x2b, 0x67, 0x69, 0x74, 0x68, 0x75, - 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x67, 0x6f, 0x2d, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2f, - 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x67, 0x72, 0x70, 0x63, - 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x3f, 0x0a, 0x0c, 0x53, 0x74, 0x6f, + 0x72, 0x65, 0x73, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x12, 0x1c, 0x2e, 0x62, 0x61, 0x63, 0x6b, + 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, + 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, + 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x42, 0x0a, 0x09, 0x53, 0x74, + 0x6f, 0x72, 0x65, 0x73, 0x47, 0x65, 0x74, 0x12, 0x19, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, + 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x47, 0x65, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, + 0x6e, 0x73, 0x1a, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, + 0x72, 0x65, 0x73, 0x47, 0x65, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x45, + 0x0a, 0x0a, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69, 0x6e, 0x64, 0x12, 0x1a, 0x2e, 0x62, + 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69, 0x6e, + 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x19, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, + 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69, 0x6e, 0x64, 0x52, 0x65, 0x73, + 0x75, 0x6c, 0x74, 0x22, 0x00, 0x42, 0x5a, 0x0a, 0x19, 0x69, 0x6f, 0x2e, 0x73, 0x6b, 0x79, 0x6e, + 0x65, 0x74, 0x2e, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x61, 0x69, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, + 0x6e, 0x64, 0x42, 0x0e, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49, 0x42, 0x61, 0x63, 0x6b, 0x65, + 0x6e, 0x64, 0x50, 0x01, 0x5a, 0x2b, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, + 0x2f, 0x67, 0x6f, 0x2d, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2f, 0x4c, 0x6f, 0x63, 0x61, 0x6c, + 0x41, 0x49, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x67, 0x72, 0x70, 0x63, 0x2f, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( @@ -2470,7 +2551,7 @@ func file_backend_proto_rawDescGZIP() []byte { } var file_backend_proto_enumTypes = make([]protoimpl.EnumInfo, 1) -var file_backend_proto_msgTypes = make([]protoimpl.MessageInfo, 23) +var file_backend_proto_msgTypes = make([]protoimpl.MessageInfo, 24) var file_backend_proto_goTypes = []interface{}{ (StatusResponse_State)(0), // 0: backend.StatusResponse.State (*StoresKey)(nil), // 1: backend.StoresKey @@ -2495,7 +2576,8 @@ var file_backend_proto_goTypes = []interface{}{ (*TokenizationResponse)(nil), // 20: backend.TokenizationResponse (*MemoryUsageData)(nil), // 21: backend.MemoryUsageData (*StatusResponse)(nil), // 22: backend.StatusResponse - nil, // 23: backend.MemoryUsageData.BreakdownEntry + (*Message)(nil), // 23: backend.Message + nil, // 24: backend.MemoryUsageData.BreakdownEntry } var file_backend_proto_depIdxs = []int32{ 1, // 0: backend.StoresSetOptions.Keys:type_name -> backend.StoresKey @@ -2507,43 +2589,44 @@ var file_backend_proto_depIdxs = []int32{ 1, // 6: backend.StoresFindOptions.Key:type_name -> backend.StoresKey 1, // 7: backend.StoresFindResult.Keys:type_name -> backend.StoresKey 2, // 8: backend.StoresFindResult.Values:type_name -> backend.StoresValue - 17, // 9: backend.TranscriptResult.segments:type_name -> backend.TranscriptSegment - 23, // 10: backend.MemoryUsageData.breakdown:type_name -> backend.MemoryUsageData.BreakdownEntry - 0, // 11: backend.StatusResponse.state:type_name -> backend.StatusResponse.State - 21, // 12: backend.StatusResponse.memory:type_name -> backend.MemoryUsageData - 9, // 13: backend.Backend.Health:input_type -> backend.HealthMessage - 10, // 14: backend.Backend.Predict:input_type -> backend.PredictOptions - 12, // 15: backend.Backend.LoadModel:input_type -> backend.ModelOptions - 10, // 16: backend.Backend.PredictStream:input_type -> backend.PredictOptions - 10, // 17: backend.Backend.Embedding:input_type -> backend.PredictOptions - 18, // 18: backend.Backend.GenerateImage:input_type -> backend.GenerateImageRequest - 15, // 19: backend.Backend.AudioTranscription:input_type -> backend.TranscriptRequest - 19, // 20: backend.Backend.TTS:input_type -> backend.TTSRequest - 10, // 21: backend.Backend.TokenizeString:input_type -> backend.PredictOptions - 9, // 22: backend.Backend.Status:input_type -> backend.HealthMessage - 3, // 23: backend.Backend.StoresSet:input_type -> backend.StoresSetOptions - 4, // 24: backend.Backend.StoresDelete:input_type -> backend.StoresDeleteOptions - 5, // 25: backend.Backend.StoresGet:input_type -> backend.StoresGetOptions - 7, // 26: backend.Backend.StoresFind:input_type -> backend.StoresFindOptions - 11, // 27: backend.Backend.Health:output_type -> backend.Reply - 11, // 28: backend.Backend.Predict:output_type -> backend.Reply - 13, // 29: backend.Backend.LoadModel:output_type -> backend.Result - 11, // 30: backend.Backend.PredictStream:output_type -> backend.Reply - 14, // 31: backend.Backend.Embedding:output_type -> backend.EmbeddingResult - 13, // 32: backend.Backend.GenerateImage:output_type -> backend.Result - 16, // 33: backend.Backend.AudioTranscription:output_type -> backend.TranscriptResult - 13, // 34: backend.Backend.TTS:output_type -> backend.Result - 20, // 35: backend.Backend.TokenizeString:output_type -> backend.TokenizationResponse - 22, // 36: backend.Backend.Status:output_type -> backend.StatusResponse - 13, // 37: backend.Backend.StoresSet:output_type -> backend.Result - 13, // 38: backend.Backend.StoresDelete:output_type -> backend.Result - 6, // 39: backend.Backend.StoresGet:output_type -> backend.StoresGetResult - 8, // 40: backend.Backend.StoresFind:output_type -> backend.StoresFindResult - 27, // [27:41] is the sub-list for method output_type - 13, // [13:27] is the sub-list for method input_type - 13, // [13:13] is the sub-list for extension type_name - 13, // [13:13] is the sub-list for extension extendee - 0, // [0:13] is the sub-list for field type_name + 23, // 9: backend.PredictOptions.Messages:type_name -> backend.Message + 17, // 10: backend.TranscriptResult.segments:type_name -> backend.TranscriptSegment + 24, // 11: backend.MemoryUsageData.breakdown:type_name -> backend.MemoryUsageData.BreakdownEntry + 0, // 12: backend.StatusResponse.state:type_name -> backend.StatusResponse.State + 21, // 13: backend.StatusResponse.memory:type_name -> backend.MemoryUsageData + 9, // 14: backend.Backend.Health:input_type -> backend.HealthMessage + 10, // 15: backend.Backend.Predict:input_type -> backend.PredictOptions + 12, // 16: backend.Backend.LoadModel:input_type -> backend.ModelOptions + 10, // 17: backend.Backend.PredictStream:input_type -> backend.PredictOptions + 10, // 18: backend.Backend.Embedding:input_type -> backend.PredictOptions + 18, // 19: backend.Backend.GenerateImage:input_type -> backend.GenerateImageRequest + 15, // 20: backend.Backend.AudioTranscription:input_type -> backend.TranscriptRequest + 19, // 21: backend.Backend.TTS:input_type -> backend.TTSRequest + 10, // 22: backend.Backend.TokenizeString:input_type -> backend.PredictOptions + 9, // 23: backend.Backend.Status:input_type -> backend.HealthMessage + 3, // 24: backend.Backend.StoresSet:input_type -> backend.StoresSetOptions + 4, // 25: backend.Backend.StoresDelete:input_type -> backend.StoresDeleteOptions + 5, // 26: backend.Backend.StoresGet:input_type -> backend.StoresGetOptions + 7, // 27: backend.Backend.StoresFind:input_type -> backend.StoresFindOptions + 11, // 28: backend.Backend.Health:output_type -> backend.Reply + 11, // 29: backend.Backend.Predict:output_type -> backend.Reply + 13, // 30: backend.Backend.LoadModel:output_type -> backend.Result + 11, // 31: backend.Backend.PredictStream:output_type -> backend.Reply + 14, // 32: backend.Backend.Embedding:output_type -> backend.EmbeddingResult + 13, // 33: backend.Backend.GenerateImage:output_type -> backend.Result + 16, // 34: backend.Backend.AudioTranscription:output_type -> backend.TranscriptResult + 13, // 35: backend.Backend.TTS:output_type -> backend.Result + 20, // 36: backend.Backend.TokenizeString:output_type -> backend.TokenizationResponse + 22, // 37: backend.Backend.Status:output_type -> backend.StatusResponse + 13, // 38: backend.Backend.StoresSet:output_type -> backend.Result + 13, // 39: backend.Backend.StoresDelete:output_type -> backend.Result + 6, // 40: backend.Backend.StoresGet:output_type -> backend.StoresGetResult + 8, // 41: backend.Backend.StoresFind:output_type -> backend.StoresFindResult + 28, // [28:42] is the sub-list for method output_type + 14, // [14:28] is the sub-list for method input_type + 14, // [14:14] is the sub-list for extension type_name + 14, // [14:14] is the sub-list for extension extendee + 0, // [0:14] is the sub-list for field type_name } func init() { file_backend_proto_init() } @@ -2816,6 +2899,18 @@ func file_backend_proto_init() { return nil } } + file_backend_proto_msgTypes[22].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*Message); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } } type x struct{} out := protoimpl.TypeBuilder{ @@ -2823,7 +2918,7 @@ func file_backend_proto_init() { GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: file_backend_proto_rawDesc, NumEnums: 1, - NumMessages: 23, + NumMessages: 24, NumExtensions: 0, NumServices: 1, }, diff --git a/pkg/grpc/proto/backend_grpc.pb.go b/pkg/grpc/proto/backend_grpc.pb.go index 0314cd4e..a1f442e0 100644 --- a/pkg/grpc/proto/backend_grpc.pb.go +++ b/pkg/grpc/proto/backend_grpc.pb.go @@ -1,7 +1,7 @@ // Code generated by protoc-gen-go-grpc. DO NOT EDIT. // versions: // - protoc-gen-go-grpc v1.3.0 -// - protoc v4.23.4 +// - protoc v5.26.1 // source: backend.proto package proto From 099bd54ff21311a17f9a33f80d83da6aaa0bc524 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 11 Apr 2024 19:22:30 +0200 Subject: [PATCH 0028/2648] ci: try to build on macos14 (#2011) * ci: try to build on macos14 Signed-off-by: Ettore Di Giacinto * ci: fixup artifact name Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto --- .github/workflows/release.yaml | 44 ++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index a69a2b05..3c1cea44 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -158,3 +158,47 @@ jobs: with: files: | release/* + + + build-macOS-arm64: + strategy: + matrix: + include: + - build: 'avx2' + defines: '' + - build: 'avx' + defines: '-DLLAMA_AVX2=OFF' + - build: 'avx512' + defines: '-DLLAMA_AVX512=ON' + runs-on: macos-14 + steps: + - name: Clone + uses: actions/checkout@v4 + with: + submodules: true + - uses: actions/setup-go@v5 + with: + go-version: '1.21.x' + cache: false + - name: Dependencies + run: | + brew install protobuf grpc + - name: Build + id: build + env: + CMAKE_ARGS: "${{ matrix.defines }}" + BUILD_ID: "${{ matrix.build }}" + run: | + export C_INCLUDE_PATH=/usr/local/include + export CPLUS_INCLUDE_PATH=/usr/local/include + make dist + - uses: actions/upload-artifact@v4 + with: + name: LocalAI-MacOS-arm64-${{ matrix.build }} + path: release/ + - name: Release + uses: softprops/action-gh-release@v1 + if: startsWith(github.ref, 'refs/tags/') + with: + files: | + release/* From 70c4f110a49fd2f5f0f216932171f3dd0ae0d443 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 11 Apr 2024 20:18:05 +0200 Subject: [PATCH 0029/2648] Update overview.md --- docs/content/docs/overview.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/content/docs/overview.md b/docs/content/docs/overview.md index 3c3a397d..6aede1d6 100644 --- a/docs/content/docs/overview.md +++ b/docs/content/docs/overview.md @@ -67,9 +67,9 @@ Start the image with Docker to have a functional clone of OpenAI! 🚀: docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu # Do you have a Nvidia GPUs? Use this instead # CUDA 11 -# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:latest-aio-gpu-cuda-11 +# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:latest-aio-gpu-nvidia-cuda-11 # CUDA 12 -# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:latest-aio-gpu-cuda-12 +# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:latest-aio-gpu-nvidia-cuda-12 ``` See the [💻 Quickstart](https://localai.io/basics/getting_started/) for all the options and way you can run LocalAI! From da82ce81b5dd139932fb3a8e8fd565f36be79d38 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 11 Apr 2024 18:57:33 +0000 Subject: [PATCH 0030/2648] build(deps): bump github.com/opencontainers/runc from 1.1.5 to 1.1.12 (#2000) Bumps [github.com/opencontainers/runc](https://github.com/opencontainers/runc) from 1.1.5 to 1.1.12. - [Release notes](https://github.com/opencontainers/runc/releases) - [Changelog](https://github.com/opencontainers/runc/blob/main/CHANGELOG.md) - [Commits](https://github.com/opencontainers/runc/compare/v1.1.5...v1.1.12) --- updated-dependencies: - dependency-name: github.com/opencontainers/runc dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 36 ++---------------------------------- 2 files changed, 3 insertions(+), 35 deletions(-) diff --git a/go.mod b/go.mod index 298f2d69..99af8ce7 100644 --- a/go.mod +++ b/go.mod @@ -109,7 +109,7 @@ require ( github.com/olekukonko/tablewriter v0.0.5 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/image-spec v1.0.2 // indirect - github.com/opencontainers/runc v1.1.5 // indirect + github.com/opencontainers/runc v1.1.12 // indirect github.com/pierrec/lz4/v4 v4.1.2 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pkoukk/tiktoken-go v0.1.2 // indirect diff --git a/go.sum b/go.sum index 551dd922..a421e79c 100644 --- a/go.sum +++ b/go.sum @@ -1,6 +1,5 @@ github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78 h1:w+iIsaOQNcT7OZ575w+acHgRric5iCyQh+xv+KJ4HB8= github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX56iTiv29bbRTIsUNlaFfuhWRQBWjQdVyAevI8= -github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc= github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE= github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf h1:UgjXLcE9I+VaVz7uBIlzAnyZIXwiDlIiTWqCh159aUI= @@ -38,20 +37,14 @@ github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/charmbracelet/glamour v0.7.0 h1:2BtKGZ4iVJCDfMF229EzbeR1QRKLWztO9dMtjmqZSng= github.com/charmbracelet/glamour v0.7.0/go.mod h1:jUMh5MeihljJPQbJ/wf4ldw2+yBP59+ctV36jASy7ps= -github.com/checkpoint-restore/go-criu/v5 v5.3.0/go.mod h1:E/eQpaFtUKGOOSEBZgmKAcn+zUUwWxqcaKZlF54wK8E= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= -github.com/cilium/ebpf v0.7.0/go.mod h1:/oI2+1shJiTGAMgl6/RgJr36Eo1jzrRcAWbcXO2usCA= -github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U= github.com/containerd/continuity v0.3.0 h1:nisirsYROK15TAMVukJOUyGJjz4BNQJBVsNvAXZJ/eg= github.com/containerd/continuity v0.3.0/go.mod h1:wJEAIwKOm/pBZuBd0JmeTvnLquTB1Ag8espWhkykbPM= -github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= -github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/creack/pty v1.1.11 h1:07n33Z8lZxZ2qwegKbObQohDhXDQxiMMz1NOUGYlesw= github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= -github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -70,7 +63,6 @@ github.com/donomii/go-rwkv.cpp v0.0.0-20230715075832-c898cd0f62df/go.mod h1:gWy7 github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 h1:iFaUwBSo5Svw6L7HYpRu/0lE3e0BaElwnNO1qkNQxBY= github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s= github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY= -github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= @@ -108,7 +100,6 @@ github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg78 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= -github.com/godbus/dbus/v5 v5.0.6/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/gofiber/fiber/v2 v2.52.4 h1:P+T+4iK7VaqUsq2PALYEfBBo6bJZ4q3FP8cZ84EggTM= github.com/gofiber/fiber/v2 v2.52.4/go.mod h1:KEOE+cXMhXG0zHc9d8+E38hoX+ZN7bhOtgeF2oT6jrQ= github.com/gofiber/swagger v1.0.0 h1:BzUzDS9ZT6fDUa692kxmfOjc1DZiloLiPK/W5z1H1tc= @@ -137,7 +128,6 @@ github.com/golang/snappy v0.0.2/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEW github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= @@ -180,11 +170,8 @@ github.com/klauspost/compress v1.17.0/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQs github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE= github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= -github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= -github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= -github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/lib/pq v0.0.0-20180327071824-d34b9ff171c2 h1:hRGSmZu7j271trc9sneMrpOW7GN5ngLm8YUZIPzf394= @@ -221,10 +208,8 @@ github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyua github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/mitchellh/reflectwalk v1.0.0 h1:9D+8oIskB4VJBN5SFlmc27fSlIBZaov1Wpk/IfikLNY= github.com/mitchellh/reflectwalk v1.0.0/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw= -github.com/moby/sys/mountinfo v0.5.0/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdxbhnCLlSvSU= github.com/moby/term v0.0.0-20201216013528-df9cb8a40635 h1:rzf0wL0CHVc8CEsgyygG0Mn9CNCCPZqOPaz8RiiHYQk= github.com/moby/term v0.0.0-20201216013528-df9cb8a40635/go.mod h1:FBS0z0QWA44HXygs7VXDUOGoN/1TV3RuWkLO04am3wc= -github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ= github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760 h1:OFVkSxR7CRSRSNm5dvpMRZwmSwWa8EMMnHbc84fW5tU= github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig= github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c h1:CI5uGwqBpN8N7BrSKC+nmdfw+9nPQIDyjHHlaIiitZI= @@ -259,10 +244,8 @@ github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8 github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/image-spec v1.0.2 h1:9yCKha/T5XdGtO0q9Q9a6T5NUCsTn/DrBg0D7ufOcFM= github.com/opencontainers/image-spec v1.0.2/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0= -github.com/opencontainers/runc v1.1.5 h1:L44KXEpKmfWDcS02aeGm8QNTFXTo2D+8MYGDIJ/GDEs= -github.com/opencontainers/runc v1.1.5/go.mod h1:1J5XiS+vdZ3wCyZybsuxXZWGrgSr8fFJHLXuG2PsnNg= -github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= -github.com/opencontainers/selinux v1.10.0/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI= +github.com/opencontainers/runc v1.1.12 h1:BOIssBaW1La0/qbNZHXOOa71dZfZEQOzW7dqQf3phss= +github.com/opencontainers/runc v1.1.12/go.mod h1:S+lQwSfncpBha7XTy/5lBwWgm5+y5Ma/O44Ekby9FK8= github.com/ory/dockertest/v3 v3.10.0 h1:4K3z2VMe8Woe++invjaTB7VRyQXQy5UY+loujO4aNE4= github.com/ory/dockertest/v3 v3.10.0/go.mod h1:nr57ZbRWMqfsdGdFNLHz5jjNdDb7VVFnzAeW1n5N1Lg= github.com/otiai10/mint v1.6.1 h1:kgbTJmOpp/0ce7hk3H8jiSuR0MXmpwWRfqUdKww17qg= @@ -300,12 +283,10 @@ github.com/rs/zerolog v1.31.0 h1:FcTR3NnLWW+NnTwwhFWiJSZr4ECLpqCm6QsEnyvbV4A= github.com/rs/zerolog v1.31.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss= github.com/russross/blackfriday v1.6.0 h1:KqfZb0pUVN2lYqZUYRddxF4OR8ZMURnJIG5Y3VRLtww= github.com/russross/blackfriday v1.6.0/go.mod h1:ti0ldHuxg49ri4ksnFxlkCfN+hvslNlmVHqNRXXJNAY= -github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/sashabaranov/go-openai v1.20.4 h1:095xQ/fAtRa0+Rj21sezVJABgKfGPNbyx/sAN/hJUmg= github.com/sashabaranov/go-openai v1.20.4/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg= github.com/schollz/progressbar/v3 v3.13.1 h1:o8rySDYiQ59Mwzy2FELeHY5ZARXZTVJC7iHD6PEFUiE= github.com/schollz/progressbar/v3 v3.13.1/go.mod h1:xvrbki8kfT1fzWzBT/UZd9L6GA+jdL7HAgq2RFnO6fQ= -github.com/seccomp/libseccomp-golang v0.9.2-0.20220502022130-f33da4d89646/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg= github.com/shirou/gopsutil/v3 v3.23.7/go.mod h1:c4gnmoRC0hQuaLqvxnx1//VXQ0Ms/X9UnJF8pddY5z4= github.com/shirou/gopsutil/v3 v3.23.9 h1:ZI5bWVeu2ep4/DIxB4U9okeYJ7zp/QLTO4auRb/ty/E= github.com/shirou/gopsutil/v3 v3.23.9/go.mod h1:x/NWSb71eMcjFIO0vhyGW5nZ7oSIgVjrCnADckb85GA= @@ -315,7 +296,6 @@ github.com/shoenig/test v0.6.4 h1:kVTaSd7WLz5WZ2IaoM0RSzRsUD+m8wRR+5qvntpn4LU= github.com/shoenig/test v0.6.4/go.mod h1:byHiCGXqrVaflBLAMq/srcZIHynQPQgeyvkvXnjqq0k= github.com/shopspring/decimal v1.2.0 h1:abSATXmQEYyShuxI4/vyW3tV1MrKAJzCZ/0zLUXYbsQ= github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o= -github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE= github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/spf13/cast v1.3.1 h1:nFm6S0SMdyzrzcmThSipiEubIDy8WEXKNZ0UOgiRpng= @@ -337,7 +317,6 @@ github.com/swaggo/files/v2 v2.0.0 h1:hmAt8Dkynw7Ssz46F6pn8ok6YmGZqHSVLZ+HQM7i0kw github.com/swaggo/files/v2 v2.0.0/go.mod h1:24kk2Y9NYEJ5lHuCra6iVwkMjIekMCaFq/0JQj66kyM= github.com/swaggo/swag v1.16.3 h1:PnCYjPCah8FK4I26l2F/KQ4yz3sILcVUN3cTlBFA9Pg= github.com/swaggo/swag v1.16.3/go.mod h1:DImHIuOFXKpMFAQjcC7FG4m3Dg4+QuUgUzJmKjI/gRk= -github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= github.com/tklauser/go-sysconf v0.3.11/go.mod h1:GqXfhXY3kiPa0nAXPDIQIWzJbMCB7AmcWpGR8lSZfqI= github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFAEVmqU= github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI= @@ -349,15 +328,12 @@ github.com/tmc/langchaingo v0.0.0-20231019140956-c636b3da7701/go.mod h1:SiwyRS7s github.com/ulikunitz/xz v0.5.8/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= github.com/ulikunitz/xz v0.5.9 h1:RsKRIA2MO8x56wkkcd3LbtcE/uMszhb6DpRf+3uwa3I= github.com/ulikunitz/xz v0.5.9/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= -github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= github.com/valyala/fasthttp v1.51.0 h1:8b30A5JlZ6C7AS81RsWjYMQmrZG6feChmgAolCl1SqA= github.com/valyala/fasthttp v1.51.0/go.mod h1:oI2XroL+lI7vdXyYoQk03bXBThfFl2cVdIA3Xl7cH8g= github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8= github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc= -github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE= -github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU= github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f h1:J9EGpcZtP0E/raorCMxlFGSTBrsSlaDGf3jU/qvAE2c= github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHovont7NscjpAxXsDA8S8BMYve8Y5+7cuRE7R0= @@ -407,7 +383,6 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= @@ -425,12 +400,10 @@ golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -439,12 +412,8 @@ golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210906170528-6f6e22806c34/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20211116061358-0a5406a5449c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -494,7 +463,6 @@ google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzi google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= From b2785ff06e3eb7c1d62a6c3921ae706d58c054dd Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 12 Apr 2024 00:49:23 +0200 Subject: [PATCH 0031/2648] feat(gallery): support ConfigURLs (#2012) Signed-off-by: Ettore Di Giacinto --- core/http/api_test.go | 24 +++++++++++++++++++++ core/http/endpoints/localai/gallery.go | 4 +++- core/services/gallery.go | 5 +++++ docs/content/docs/features/model-gallery.md | 10 ++++++--- pkg/gallery/op.go | 1 + 5 files changed, 40 insertions(+), 4 deletions(-) diff --git a/core/http/api_test.go b/core/http/api_test.go index 804c15fe..1553ed21 100644 --- a/core/http/api_test.go +++ b/core/http/api_test.go @@ -43,6 +43,7 @@ Can you help rephrasing sentences? type modelApplyRequest struct { ID string `json:"id"` URL string `json:"url"` + ConfigURL string `json:"config_url"` Name string `json:"name"` Overrides map[string]interface{} `json:"overrides"` } @@ -366,6 +367,29 @@ var _ = Describe("API test", func() { Expect(err).ToNot(HaveOccurred()) Expect(content["backend"]).To(Equal("llama")) }) + It("apply models from config", func() { + response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{ + ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml", + }) + + Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response)) + + uuid := response["uuid"].(string) + + Eventually(func() bool { + response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid) + return response["processed"].(bool) + }, "360s", "10s").Should(Equal(true)) + + Eventually(func() []string { + models, _ := client.ListModels(context.TODO()) + modelList := []string{} + for _, m := range models.Models { + modelList = append(modelList, m.ID) + } + return modelList + }, "360s", "10s").Should(ContainElements("hermes-2-pro-mistral")) + }) It("apply models without overrides", func() { response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{ URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml", diff --git a/core/http/endpoints/localai/gallery.go b/core/http/endpoints/localai/gallery.go index 5c295a2a..b693e7c3 100644 --- a/core/http/endpoints/localai/gallery.go +++ b/core/http/endpoints/localai/gallery.go @@ -19,7 +19,8 @@ type ModelGalleryEndpointService struct { } type GalleryModel struct { - ID string `json:"id"` + ID string `json:"id"` + ConfigURL string `json:"config_url"` gallery.GalleryModel } @@ -64,6 +65,7 @@ func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fibe Id: uuid.String(), GalleryName: input.ID, Galleries: mgs.galleries, + ConfigURL: input.ConfigURL, } return c.JSON(struct { ID string `json:"uuid"` diff --git a/core/services/gallery.go b/core/services/gallery.go index 826f4573..b068abbb 100644 --- a/core/services/gallery.go +++ b/core/services/gallery.go @@ -9,6 +9,7 @@ import ( "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/pkg/gallery" + "github.com/go-skynet/LocalAI/pkg/startup" "github.com/go-skynet/LocalAI/pkg/utils" "gopkg.in/yaml.v2" ) @@ -90,6 +91,9 @@ func (g *GalleryService) Start(c context.Context, cl *config.BackendConfigLoader } else { err = gallery.InstallModelFromGalleryByName(op.Galleries, op.GalleryName, g.modelPath, op.Req, progressCallback) } + } else if op.ConfigURL != "" { + startup.PreloadModelsConfigurations(op.ConfigURL, g.modelPath, op.ConfigURL) + err = cl.Preload(g.modelPath) } else { err = prepareModel(g.modelPath, op.Req, cl, progressCallback) } @@ -129,6 +133,7 @@ func processRequests(modelPath, s string, cm *config.BackendConfigLoader, galler utils.ResetDownloadTimers() if r.ID == "" { err = prepareModel(modelPath, r.GalleryModel, cm, utils.DisplayDownloadFunction) + } else { if strings.Contains(r.ID, "@") { err = gallery.InstallModelFromGallery( diff --git a/docs/content/docs/features/model-gallery.md b/docs/content/docs/features/model-gallery.md index 0d978122..05d15ef4 100644 --- a/docs/content/docs/features/model-gallery.md +++ b/docs/content/docs/features/model-gallery.md @@ -146,12 +146,16 @@ In the body of the request you must specify the model configuration file URL (`u ```bash LOCALAI=http://localhost:8080 curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{ - "url": "" + "config_url": "" }' # or if from a repository curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{ "id": "@" }' +# or from a gallery config +curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{ + "url": "" + }' ``` An example that installs openllama can be: @@ -159,8 +163,8 @@ An example that installs openllama can be: ```bash LOCALAI=http://localhost:8080 curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{ - "url": "https://github.com/go-skynet/model-gallery/blob/main/openllama_3b.yaml" - }' + "config_url": "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml" + }' ``` The API will return a job `uuid` that you can use to track the job progress: diff --git a/pkg/gallery/op.go b/pkg/gallery/op.go index 873c356d..99796812 100644 --- a/pkg/gallery/op.go +++ b/pkg/gallery/op.go @@ -5,6 +5,7 @@ type GalleryOp struct { Id string Galleries []Gallery GalleryName string + ConfigURL string } type GalleryOpStatus struct { From 677e20756b31ce158b207b246b0ae373f826897e Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Fri, 12 Apr 2024 00:49:41 +0200 Subject: [PATCH 0032/2648] :arrow_up: Update ggerganov/llama.cpp (#2014) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e2e4f211..e15166a8 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=8228b66dbc16290c5cbd70e80ab47c068e2569d8 +CPPLLAMA_VERSION?=a474f50ebb3e10be3371562f75f3f573f1a86b5f # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From e0dee52a2ab811fccc18f309a6c5fefcb4725448 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 12 Apr 2024 00:53:43 -0400 Subject: [PATCH 0033/2648] build(deps): bump the pip group across 4 directories with 8 updates (#2017) * build(deps): bump the pip group across 4 directories with 8 updates Bumps the pip group with 1 update in the /examples/functions directory: [langchain](https://github.com/langchain-ai/langchain). Bumps the pip group with 2 updates in the /examples/langchain-chroma directory: [langchain](https://github.com/langchain-ai/langchain) and [llama-index](https://github.com/run-llama/llama_index). Bumps the pip group with 6 updates in the /examples/langchain/langchainpy-localai-example directory: | Package | From | To | | --- | --- | --- | | [langchain](https://github.com/langchain-ai/langchain) | `0.0.159` | `0.1.0` | | [aiohttp](https://github.com/aio-libs/aiohttp) | `3.8.4` | `3.9.2` | | [certifi](https://github.com/certifi/python-certifi) | `2022.12.7` | `2023.7.22` | | [idna](https://github.com/kjd/idna) | `3.4` | `3.7` | | [requests](https://github.com/psf/requests) | `2.29.0` | `2.31.0` | | [urllib3](https://github.com/urllib3/urllib3) | `1.26.15` | `1.26.18` | Bumps the pip group with 1 update in the /examples/streamlit-bot directory: [streamlit](https://github.com/streamlit/streamlit). Updates `langchain` from 0.0.234 to 0.1.0 - [Release notes](https://github.com/langchain-ai/langchain/releases) - [Commits](https://github.com/langchain-ai/langchain/compare/v0.0.234...v0.1.0) Updates `langchain` from 0.0.160 to 0.1.0 - [Release notes](https://github.com/langchain-ai/langchain/releases) - [Commits](https://github.com/langchain-ai/langchain/compare/v0.0.234...v0.1.0) Updates `llama-index` from 0.6.2 to 0.9.36 - [Release notes](https://github.com/run-llama/llama_index/releases) - [Changelog](https://github.com/run-llama/llama_index/blob/main/CHANGELOG.md) - [Commits](https://github.com/run-llama/llama_index/compare/v0.6.2...v0.9.36) Updates `langchain` from 0.0.159 to 0.1.0 - [Release notes](https://github.com/langchain-ai/langchain/releases) - [Commits](https://github.com/langchain-ai/langchain/compare/v0.0.234...v0.1.0) Updates `aiohttp` from 3.8.4 to 3.9.2 - [Release notes](https://github.com/aio-libs/aiohttp/releases) - [Changelog](https://github.com/aio-libs/aiohttp/blob/master/CHANGES.rst) - [Commits](https://github.com/aio-libs/aiohttp/compare/v3.8.4...v3.9.2) Updates `certifi` from 2022.12.7 to 2023.7.22 - [Commits](https://github.com/certifi/python-certifi/compare/2022.12.07...2023.07.22) Updates `idna` from 3.4 to 3.7 - [Release notes](https://github.com/kjd/idna/releases) - [Changelog](https://github.com/kjd/idna/blob/master/HISTORY.rst) - [Commits](https://github.com/kjd/idna/compare/v3.4...v3.7) Updates `requests` from 2.29.0 to 2.31.0 - [Release notes](https://github.com/psf/requests/releases) - [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md) - [Commits](https://github.com/psf/requests/compare/v2.29.0...v2.31.0) Updates `urllib3` from 1.26.15 to 1.26.18 - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/1.26.15...1.26.18) Updates `streamlit` from 1.26.0 to 1.30.0 - [Release notes](https://github.com/streamlit/streamlit/releases) - [Commits](https://github.com/streamlit/streamlit/compare/1.26.0...1.30.0) --- updated-dependencies: - dependency-name: langchain dependency-type: direct:production dependency-group: pip - dependency-name: langchain dependency-type: direct:production dependency-group: pip - dependency-name: llama-index dependency-type: direct:production dependency-group: pip - dependency-name: langchain dependency-type: direct:production dependency-group: pip - dependency-name: aiohttp dependency-type: direct:production dependency-group: pip - dependency-name: certifi dependency-type: direct:production dependency-group: pip - dependency-name: idna dependency-type: direct:production dependency-group: pip - dependency-name: requests dependency-type: direct:production dependency-group: pip - dependency-name: urllib3 dependency-type: direct:production dependency-group: pip - dependency-name: streamlit dependency-type: direct:production dependency-group: pip ... Signed-off-by: dependabot[bot] * Update version.json PR appears stuck on a check, needs any arbitrary commit to run the security check workflow with write permissions. Bumping docs versions to match latest release as said useless change. Signed-off-by: Dave --------- Signed-off-by: dependabot[bot] Signed-off-by: Dave Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Dave --- docs/data/version.json | 2 +- examples/functions/requirements.txt | 2 +- examples/langchain-chroma/requirements.txt | 4 ++-- .../langchainpy-localai-example/requirements.txt | 12 ++++++------ examples/streamlit-bot/requirements.txt | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/data/version.json b/docs/data/version.json index 1b6a2161..6a618115 100644 --- a/docs/data/version.json +++ b/docs/data/version.json @@ -1,3 +1,3 @@ { - "version": "v2.12.3" + "version": "v2.12.4" } diff --git a/examples/functions/requirements.txt b/examples/functions/requirements.txt index 7164e011..759c5b03 100644 --- a/examples/functions/requirements.txt +++ b/examples/functions/requirements.txt @@ -1,2 +1,2 @@ -langchain==0.0.234 +langchain==0.1.0 openai==0.27.8 diff --git a/examples/langchain-chroma/requirements.txt b/examples/langchain-chroma/requirements.txt index b9e649c5..cdf466b9 100644 --- a/examples/langchain-chroma/requirements.txt +++ b/examples/langchain-chroma/requirements.txt @@ -1,4 +1,4 @@ -langchain==0.0.160 +langchain==0.1.0 openai==0.27.6 chromadb==0.3.21 -llama-index==0.6.2 \ No newline at end of file +llama-index==0.9.36 \ No newline at end of file diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt index 2de5bcf0..1e63b0bf 100644 --- a/examples/langchain/langchainpy-localai-example/requirements.txt +++ b/examples/langchain/langchainpy-localai-example/requirements.txt @@ -1,16 +1,16 @@ -aiohttp==3.8.4 +aiohttp==3.9.2 aiosignal==1.3.1 async-timeout==4.0.2 attrs==23.1.0 -certifi==2022.12.7 +certifi==2023.7.22 charset-normalizer==3.1.0 colorama==0.4.6 dataclasses-json==0.5.7 debugpy==1.6.7 frozenlist==1.3.3 greenlet==2.0.2 -idna==3.4 -langchain==0.0.159 +idna==3.7 +langchain==0.1.0 marshmallow==3.19.0 marshmallow-enum==1.5.1 multidict==6.0.4 @@ -22,11 +22,11 @@ openapi-schema-pydantic==1.2.4 packaging==23.1 pydantic==1.10.7 PyYAML==6.0 -requests==2.29.0 +requests==2.31.0 SQLAlchemy==2.0.12 tenacity==8.2.2 tqdm==4.65.0 typing-inspect==0.8.0 typing_extensions==4.5.0 -urllib3==1.26.15 +urllib3==1.26.18 yarl==1.9.2 diff --git a/examples/streamlit-bot/requirements.txt b/examples/streamlit-bot/requirements.txt index ae527c76..1fcd5093 100644 --- a/examples/streamlit-bot/requirements.txt +++ b/examples/streamlit-bot/requirements.txt @@ -1,2 +1,2 @@ -streamlit==1.26.0 +streamlit==1.30.0 requests \ No newline at end of file From 7e52c8e21ad3ee054444f90d5b16fd49e3f411b9 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 12 Apr 2024 15:27:40 +0200 Subject: [PATCH 0035/2648] Update CONTRIBUTING.md Signed-off-by: Ettore Di Giacinto --- CONTRIBUTING.md | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0e237ea7..593ad0ed 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,4 +1,4 @@ -# Contributing to localAI +# Contributing to LocalAI Thank you for your interest in contributing to LocalAI! We appreciate your time and effort in helping to improve our project. Before you get started, please take a moment to review these guidelines. @@ -29,8 +29,9 @@ Thank you for your interest in contributing to LocalAI! We appreciate your time 1. Clone the repository: `git clone https://github.com/go-skynet/LocalAI.git` 2. Navigate to the project directory: `cd LocalAI` -3. Install the required dependencies: `make prepare` -4. Run LocalAI: `make run` +3. Install the required dependencies ( see https://localai.io/basics/build/#build-localai-locally ) +4. Build LocalAI: `make build` +5. Run LocalAI: `./local-ai` ## Contributing @@ -59,14 +60,29 @@ If you find a bug, have a feature request, or encounter any issues, please check `make test` cannot handle all the model now. Please be sure to add a test case for the new features or the part was changed. +### Running AIO tests + +All-In-One images has a set of tests that automatically verifies that most of the endpoints works correctly, a flow can be : + +```bash +# Build the LocalAI docker image +make DOCKER_IMAGE=local-ai docker + +# Build the corresponding AIO image +BASE_IMAGE=local-ai DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio + +# Run the AIO e2e tests +LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio make run-e2e-aio +``` + ## Documentation -- We are welcome the contribution of the documents, please open new PR in the official document repo [localai-website](https://github.com/go-skynet/localai-website) - +We are welcome the contribution of the documents, please open new PR or create a new issue. The documentation is available under `docs/` https://github.com/mudler/LocalAI/tree/master/docs + ## Community and Communication - You can reach out via the Github issue tracker. - Open a new discussion at [Discussion](https://github.com/go-skynet/LocalAI/discussions) - Join the Discord channel [Discord](https://discord.gg/uJAeKSAGDy) ---- \ No newline at end of file +--- From fb105837bac4b1468db5464ab572bb3ec7e61389 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 12 Apr 2024 15:37:56 +0200 Subject: [PATCH 0036/2648] Update secscan.yaml Signed-off-by: Ettore Di Giacinto --- .github/workflows/secscan.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/secscan.yaml b/.github/workflows/secscan.yaml index 14958070..884b84d5 100644 --- a/.github/workflows/secscan.yaml +++ b/.github/workflows/secscan.yaml @@ -15,13 +15,16 @@ jobs: steps: - name: Checkout Source uses: actions/checkout@v4 + if: ${{ github.actor != 'dependabot[bot]' }} - name: Run Gosec Security Scanner + if: ${{ github.actor != 'dependabot[bot]' }} uses: securego/gosec@master with: # we let the report trigger content trigger a failure using the GitHub Security features. args: '-no-fail -fmt sarif -out results.sarif ./...' - name: Upload SARIF file + if: ${{ github.actor != 'dependabot[bot]' }} uses: github/codeql-action/upload-sarif@v2 with: # Path to SARIF file relative to the root of the repository - sarif_file: results.sarif \ No newline at end of file + sarif_file: results.sarif From 18eea9088a866eab14cd3859af13c96653f89c3a Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 12 Apr 2024 15:38:34 +0200 Subject: [PATCH 0037/2648] Update dependabot_auto.yml Signed-off-by: Ettore Di Giacinto --- .github/workflows/dependabot_auto.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/dependabot_auto.yml b/.github/workflows/dependabot_auto.yml index 12541d05..22c709e3 100644 --- a/.github/workflows/dependabot_auto.yml +++ b/.github/workflows/dependabot_auto.yml @@ -1,7 +1,6 @@ name: Dependabot auto-merge on: - pull_request_target: - types: [review_requested] +- pull_request_target permissions: contents: write From 69d638268b67afed91b15ae5b124255569589a47 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 12 Apr 2024 15:57:13 +0200 Subject: [PATCH 0038/2648] Update dependabot_auto.yml Signed-off-by: Ettore Di Giacinto --- .github/workflows/dependabot_auto.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/dependabot_auto.yml b/.github/workflows/dependabot_auto.yml index 22c709e3..f9d03a30 100644 --- a/.github/workflows/dependabot_auto.yml +++ b/.github/workflows/dependabot_auto.yml @@ -40,4 +40,4 @@ jobs: run: gh pr merge --auto --merge "$PR_URL" env: PR_URL: ${{github.event.pull_request.html_url}} - GITHUB_TOKEN: ${{secrets.RELEASE_TOKEN}} + GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} From 0e549424e782e315ee166efdb1cba77a1a4a750b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 12 Apr 2024 15:59:25 +0200 Subject: [PATCH 0039/2648] Update dependabot_auto.yml Signed-off-by: Ettore Di Giacinto --- .github/workflows/dependabot_auto.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/dependabot_auto.yml b/.github/workflows/dependabot_auto.yml index f9d03a30..51337d20 100644 --- a/.github/workflows/dependabot_auto.yml +++ b/.github/workflows/dependabot_auto.yml @@ -37,7 +37,7 @@ jobs: - name: Enable auto-merge for Dependabot PRs if: ${{ contains(github.event.pull_request.title, 'bump')}} - run: gh pr merge --auto --merge "$PR_URL" + run: gh pr merge --auto --squash "$PR_URL" env: PR_URL: ${{github.event.pull_request.html_url}} GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} From fcb63aed8a969a2419ed593d8facdccf3ab88e5f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 12 Apr 2024 15:36:46 +0000 Subject: [PATCH 0040/2648] build(deps): bump follow-redirects from 1.15.2 to 1.15.6 in /examples/langchain/langchainjs-localai-example (#2020) build(deps): bump follow-redirects Bumps [follow-redirects](https://github.com/follow-redirects/follow-redirects) from 1.15.2 to 1.15.6. - [Release notes](https://github.com/follow-redirects/follow-redirects/releases) - [Commits](https://github.com/follow-redirects/follow-redirects/compare/v1.15.2...v1.15.6) --- updated-dependencies: - dependency-name: follow-redirects dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .../langchainjs-localai-example/package-lock.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/langchain/langchainjs-localai-example/package-lock.json b/examples/langchain/langchainjs-localai-example/package-lock.json index 29e6999f..e0a45539 100644 --- a/examples/langchain/langchainjs-localai-example/package-lock.json +++ b/examples/langchain/langchainjs-localai-example/package-lock.json @@ -369,9 +369,9 @@ } }, "node_modules/follow-redirects": { - "version": "1.15.2", - "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz", - "integrity": "sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA==", + "version": "1.15.6", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz", + "integrity": "sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==", "funding": [ { "type": "individual", @@ -1479,9 +1479,9 @@ "integrity": "sha512-b6suED+5/3rTpUBdG1gupIl8MPFCAMA0QXwmljLhvCUKcUvdE4gWky9zpuGCcXHOsz4J9wPGNWq6OKpmIzz3hQ==" }, "follow-redirects": { - "version": "1.15.2", - "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz", - "integrity": "sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA==" + "version": "1.15.6", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz", + "integrity": "sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==" }, "form-data": { "version": "4.0.0", From 912d2dccfa63a3a8e6720dda73e30cf8f7d6b944 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sat, 13 Apr 2024 09:13:00 +0200 Subject: [PATCH 0041/2648] :arrow_up: Update ggerganov/llama.cpp (#2024) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e15166a8..0f6d8fd2 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=a474f50ebb3e10be3371562f75f3f573f1a86b5f +CPPLLAMA_VERSION?=ab9a3240a9da941fdef5cd4a25f2b97c2f5a67aa # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From a8ebf6f575c502684e9f5118cc99622546f73438 Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Sat, 13 Apr 2024 02:14:32 -0500 Subject: [PATCH 0042/2648] fix: respect concurrency from parent build parameters when building GRPC (#2023) Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- backend/cpp/grpc/Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/backend/cpp/grpc/Makefile b/backend/cpp/grpc/Makefile index 6a181794..5308693b 100644 --- a/backend/cpp/grpc/Makefile +++ b/backend/cpp/grpc/Makefile @@ -5,7 +5,6 @@ SYSTEM ?= $(HOST_SYSTEM) TAG_LIB_GRPC?=v1.59.0 GIT_REPO_LIB_GRPC?=https://github.com/grpc/grpc.git GIT_CLONE_DEPTH?=1 -NUM_BUILD_THREADS?=$(shell nproc --ignore=1) INSTALLED_PACKAGES=installed_packages GRPC_REPO=grpc_repo @@ -52,7 +51,7 @@ $(GRPC_REPO): $(GRPC_BUILD): $(GRPC_REPO) mkdir -p $(GRPC_BUILD) - cd $(GRPC_BUILD) && cmake $(CMAKE_ARGS) ../$(GRPC_REPO)/grpc && cmake --build . -- -j ${NUM_BUILD_THREADS} && cmake --build . --target install -- -j ${NUM_BUILD_THREADS} + cd $(GRPC_BUILD) && cmake $(CMAKE_ARGS) ../$(GRPC_REPO)/grpc && cmake --build . && cmake --build . --target install build: $(INSTALLED_PACKAGES) From 1981154f49437adcbcb9956611aee4809b406947 Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Sat, 13 Apr 2024 02:37:32 -0500 Subject: [PATCH 0043/2648] fix: dont commit generated files to git (#1993) * fix: initial work towards not committing generated files to the repository Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * feat: improve build docs Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: remove unused folder from .dockerignore and .gitignore Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: attempt to fix extra backend tests Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: attempt to fix other tests Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: more test fixes Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: fix apple tests Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: more extras tests fixes Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: add GOBIN to PATH in docker build Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: extra tests and Dockerfile corrections Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: remove build dependency checks Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: add golang protobuf compilers to tests-linux action Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: ensure protogen is run for extra backend installs Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: use newer protobuf Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: more missing protoc binaries Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: missing dependencies during docker build Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: don't install grpc compilers in the final stage if they aren't needed Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: python-grpc-tools in 22.04 repos is too old Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: add a couple of extra build dependencies to Makefile Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: unbreak container rebuild functionality Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --------- Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- .dockerignore | 2 + .github/workflows/test-extra.yml | 29 +- .github/workflows/test.yml | 25 +- .gitignore | 5 + Dockerfile | 33 +- Makefile | 144 +- backend/backend_grpc.pb.go | 457 --- backend/python/autogptq/Makefile | 11 +- backend/python/autogptq/backend_pb2.py | 79 - backend/python/autogptq/backend_pb2_grpc.py | 495 --- backend/python/bark/Makefile | 16 +- backend/python/bark/backend_pb2.py | 79 - backend/python/bark/backend_pb2_grpc.py | 495 --- backend/python/coqui/Makefile | 16 +- backend/python/coqui/backend_pb2.py | 79 - backend/python/coqui/backend_pb2_grpc.py | 495 --- backend/python/diffusers/Makefile | 16 +- backend/python/diffusers/backend_pb2.py | 79 - backend/python/diffusers/backend_pb2_grpc.py | 495 --- backend/python/exllama/Makefile | 14 +- backend/python/exllama/backend_pb2.py | 79 - backend/python/exllama/backend_pb2_grpc.py | 495 --- backend/python/exllama2/Makefile | 14 +- backend/python/exllama2/backend_pb2.py | 79 - backend/python/exllama2/backend_pb2_grpc.py | 495 --- backend/python/mamba/Makefile | 18 +- backend/python/mamba/backend_pb2.py | 79 - backend/python/mamba/backend_pb2_grpc.py | 495 --- backend/python/petals/Makefile | 16 +- backend/python/petals/backend_pb2.py | 79 - backend/python/petals/backend_pb2_grpc.py | 495 --- backend/python/sentencetransformers/Makefile | 16 +- .../sentencetransformers/backend_pb2.py | 79 - .../sentencetransformers/backend_pb2_grpc.py | 495 --- backend/python/transformers-musicgen/Makefile | 17 +- .../transformers-musicgen/backend_pb2.py | 79 - .../transformers-musicgen/backend_pb2_grpc.py | 495 --- backend/python/transformers/Makefile | 16 +- backend/python/transformers/backend_pb2.py | 79 - .../python/transformers/backend_pb2_grpc.py | 495 --- backend/python/vall-e-x/Makefile | 16 +- backend/python/vall-e-x/backend_pb2.py | 79 - backend/python/vall-e-x/backend_pb2_grpc.py | 495 --- backend/python/vllm/Makefile | 18 +- backend/python/vllm/backend_pb2.py | 79 - backend/python/vllm/backend_pb2_grpc.py | 495 --- docs/content/docs/getting-started/build.md | 18 +- pkg/grpc/proto/backend.pb.go | 2934 ----------------- pkg/grpc/proto/backend_grpc.pb.go | 618 ---- 49 files changed, 381 insertions(+), 11550 deletions(-) delete mode 100644 backend/backend_grpc.pb.go delete mode 100644 backend/python/autogptq/backend_pb2.py delete mode 100644 backend/python/autogptq/backend_pb2_grpc.py delete mode 100644 backend/python/bark/backend_pb2.py delete mode 100644 backend/python/bark/backend_pb2_grpc.py delete mode 100644 backend/python/coqui/backend_pb2.py delete mode 100644 backend/python/coqui/backend_pb2_grpc.py delete mode 100644 backend/python/diffusers/backend_pb2.py delete mode 100644 backend/python/diffusers/backend_pb2_grpc.py delete mode 100644 backend/python/exllama/backend_pb2.py delete mode 100644 backend/python/exllama/backend_pb2_grpc.py delete mode 100644 backend/python/exllama2/backend_pb2.py delete mode 100644 backend/python/exllama2/backend_pb2_grpc.py delete mode 100644 backend/python/mamba/backend_pb2.py delete mode 100644 backend/python/mamba/backend_pb2_grpc.py delete mode 100644 backend/python/petals/backend_pb2.py delete mode 100644 backend/python/petals/backend_pb2_grpc.py delete mode 100644 backend/python/sentencetransformers/backend_pb2.py delete mode 100644 backend/python/sentencetransformers/backend_pb2_grpc.py delete mode 100644 backend/python/transformers-musicgen/backend_pb2.py delete mode 100644 backend/python/transformers-musicgen/backend_pb2_grpc.py delete mode 100644 backend/python/transformers/backend_pb2.py delete mode 100644 backend/python/transformers/backend_pb2_grpc.py delete mode 100644 backend/python/vall-e-x/backend_pb2.py delete mode 100644 backend/python/vall-e-x/backend_pb2_grpc.py delete mode 100644 backend/python/vllm/backend_pb2.py delete mode 100644 backend/python/vllm/backend_pb2_grpc.py delete mode 100644 pkg/grpc/proto/backend.pb.go delete mode 100644 pkg/grpc/proto/backend_grpc.pb.go diff --git a/.dockerignore b/.dockerignore index 97e8aa34..2c394c48 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,4 +1,6 @@ .idea +.github +.vscode models examples/chatbot-ui/models examples/rwkv/models diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml index 7689f06d..7705783e 100644 --- a/.github/workflows/test-extra.yml +++ b/.github/workflows/test-extra.yml @@ -32,8 +32,9 @@ jobs: sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \ sudo apt-get update && \ sudo apt-get install -y conda - sudo apt-get install -y ca-certificates cmake curl patch + sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y libopencv-dev + pip install --user grpcio-tools sudo rm -rfv /usr/bin/conda || true @@ -61,8 +62,9 @@ jobs: sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \ sudo apt-get update && \ sudo apt-get install -y conda - sudo apt-get install -y ca-certificates cmake curl patch + sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y libopencv-dev + pip install --user grpcio-tools sudo rm -rfv /usr/bin/conda || true @@ -90,8 +92,9 @@ jobs: sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \ sudo apt-get update && \ sudo apt-get install -y conda - sudo apt-get install -y ca-certificates cmake curl patch + sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y libopencv-dev + pip install --user grpcio-tools sudo rm -rfv /usr/bin/conda || true @@ -120,8 +123,9 @@ jobs: sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \ sudo apt-get update && \ sudo apt-get install -y conda - sudo apt-get install -y ca-certificates cmake curl patch + sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y libopencv-dev + pip install --user grpcio-tools sudo rm -rfv /usr/bin/conda || true @@ -151,8 +155,9 @@ jobs: # sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \ # sudo apt-get update && \ # sudo apt-get install -y conda - # sudo apt-get install -y ca-certificates cmake curl patch + # sudo apt-get install -y ca-certificates cmake curl patch python3-pip # sudo apt-get install -y libopencv-dev + # pip install --user grpcio-tools # sudo rm -rfv /usr/bin/conda || true @@ -222,8 +227,9 @@ jobs: # sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \ # sudo apt-get update && \ # sudo apt-get install -y conda - # sudo apt-get install -y ca-certificates cmake curl patch + # sudo apt-get install -y ca-certificates cmake curl patch python3-pip # sudo apt-get install -y libopencv-dev + # pip install --user grpcio-tools # sudo rm -rfv /usr/bin/conda || true @@ -254,8 +260,9 @@ jobs: # sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \ # sudo apt-get update && \ # sudo apt-get install -y conda - # sudo apt-get install -y ca-certificates cmake curl patch + # sudo apt-get install -y ca-certificates cmake curl patch python3-pip # sudo apt-get install -y libopencv-dev + # pip install --user grpcio-tools # sudo rm -rfv /usr/bin/conda || true # - name: Test vllm # run: | @@ -280,8 +287,9 @@ jobs: sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \ sudo apt-get update && \ sudo apt-get install -y conda - sudo apt-get install -y ca-certificates cmake curl patch - sudo apt-get install -y libopencv-dev + sudo apt-get install -y ca-certificates cmake curl patch python3-pip + sudo apt-get install -y libopencv-dev + pip install --user grpcio-tools sudo rm -rfv /usr/bin/conda || true - name: Test vall-e-x run: | @@ -307,7 +315,8 @@ jobs: sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \ sudo apt-get update && \ sudo apt-get install -y conda - sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng + sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng python3-pip + pip install --user grpcio-tools sudo rm -rfv /usr/bin/conda || true - name: Test coqui diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 02093b3f..46c4e065 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -70,17 +70,27 @@ jobs: - name: Dependencies run: | sudo apt-get update - sudo apt-get install build-essential ffmpeg + sudo apt-get install build-essential curl ffmpeg curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \ sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \ - gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \ + gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \ sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \ sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \ sudo apt-get update && \ sudo apt-get install -y conda - sudo apt-get install -y ca-certificates cmake curl patch + sudo apt-get install -y ca-certificates cmake patch python3-pip unzip sudo apt-get install -y libopencv-dev - + + curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \ + unzip -j -d /usr/local/bin protoc.zip bin/protoc && \ + rm protoc.zip + + go install google.golang.org/protobuf/cmd/protoc-gen-go@latest + go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest + + # The python3-grpc-tools package in 22.04 is too old + pip install --user grpcio-tools + sudo rm -rfv /usr/bin/conda || true PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers @@ -89,7 +99,7 @@ jobs: GO_TAGS="tts" make -C sources/go-piper piper.o && \ sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \ # Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn) - GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build + PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build - name: Cache grpc id: cache-grpc uses: actions/cache@v4 @@ -108,7 +118,7 @@ jobs: cd grpc && cd cmake/build && sudo make --jobs 5 install - name: Test run: | - GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test + PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test - name: Setup tmate session if tests fail if: ${{ failure() }} uses: mxschmitt/action-tmate@v3 @@ -186,7 +196,8 @@ jobs: run: go version - name: Dependencies run: | - brew install protobuf grpc make + brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc + pip install --user grpcio-tools - name: Test run: | export C_INCLUDE_PATH=/usr/local/include diff --git a/.gitignore b/.gitignore index b48f7391..f1f860e9 100644 --- a/.gitignore +++ b/.gitignore @@ -39,3 +39,8 @@ backend-assets/* !backend-assets/.keep prepare /ggml-metal.metal + +# Protobuf generated files +*.pb.go +*pb2.py +*pb2_grpc.py diff --git a/Dockerfile b/Dockerfile index 5fb6230c..d0217d50 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,12 +20,25 @@ ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface ARG GO_TAGS="stablediffusion tinydream tts" RUN apt-get update && \ - apt-get install -y ca-certificates curl patch pip cmake git && apt-get clean + apt-get install -y ca-certificates curl python3-pip unzip && apt-get clean # Install Go RUN curl -L -s https://go.dev/dl/go$GO_VERSION.linux-$TARGETARCH.tar.gz | tar -C /usr/local -xz ENV PATH $PATH:/usr/local/go/bin +# Install grpc compilers +ENV PATH $PATH:/root/go/bin +RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@latest && \ + go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest + +# Install protobuf (the version in 22.04 is too old) +RUN curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \ + unzip -j -d /usr/local/bin protoc.zip bin/protoc && \ + rm protoc.zip + +# Install grpcio-tools (the version in 22.04 is too old) +RUN pip install --user grpcio-tools + COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/ RUN update-ca-certificates @@ -68,7 +81,8 @@ RUN test -n "$TARGETARCH" \ FROM requirements-core as requirements-extras -RUN curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \ +RUN apt install -y gpg && \ + curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \ install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \ gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \ echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list && \ @@ -100,7 +114,7 @@ ENV MAKEFLAGS=${MAKEFLAGS} WORKDIR /build RUN apt-get update && \ - apt-get install -y g++ cmake git && \ + apt-get install -y build-essential cmake git && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* @@ -133,6 +147,12 @@ WORKDIR /build COPY . . COPY .git . RUN echo "GO_TAGS: $GO_TAGS" + +RUN apt-get update && \ + apt-get install -y build-essential cmake git && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + RUN make prepare # If we are building with clblas support, we need the libraries for the builds @@ -191,6 +211,11 @@ RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \ apt-get clean \ ; fi +RUN apt-get update && \ + apt-get install -y cmake git && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + WORKDIR /build # we start fresh & re-copy all assets because `make build` does not clean up nicely after itself @@ -202,7 +227,7 @@ COPY . . COPY --from=builder /build/sources ./sources/ COPY --from=grpc /build/grpc ./grpc/ -RUN make prepare-sources && cd /build/grpc/cmake/build && make install && rm -rf grpc +RUN make prepare-sources && cd /build/grpc/cmake/build && make install && rm -rf /build/grpc # Copy the binary COPY --from=builder /build/local-ai ./ diff --git a/Makefile b/Makefile index 0f6d8fd2..5932dfb2 100644 --- a/Makefile +++ b/Makefile @@ -289,10 +289,12 @@ clean: ## Remove build related file rm -rf ./sources rm -rf $(BINARY_NAME) rm -rf release/ - rm -rf backend-assets + rm -rf backend-assets/* $(MAKE) -C backend/cpp/grpc clean $(MAKE) -C backend/cpp/llama clean $(MAKE) dropreplace + $(MAKE) protogen-clean + rmdir pkg/grpc/proto || true clean-tests: rm -rf test-models @@ -416,30 +418,136 @@ help: ## Show this help. else if (/^## .*$$/) {printf " ${CYAN}%s${RESET}\n", substr($$1,4)} \ }' $(MAKEFILE_LIST) +.PHONY: protogen protogen: protogen-go protogen-python +.PHONY: protogen-clean +protogen-clean: protogen-go-clean protogen-python-clean + +.PHONY: protogen-go protogen-go: + mkdir -p pkg/grpc/proto protoc -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \ backend/backend.proto -protogen-python: - python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/sentencetransformers/ --grpc_python_out=backend/python/sentencetransformers/ backend/backend.proto - python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/transformers/ --grpc_python_out=backend/python/transformers/ backend/backend.proto - python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/transformers-musicgen/ --grpc_python_out=backend/python/transformers-musicgen/ backend/backend.proto - python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/autogptq/ --grpc_python_out=backend/python/autogptq/ backend/backend.proto - python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/exllama/ --grpc_python_out=backend/python/exllama/ backend/backend.proto - python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/bark/ --grpc_python_out=backend/python/bark/ backend/backend.proto - python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/diffusers/ --grpc_python_out=backend/python/diffusers/ backend/backend.proto - python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/coqui/ --grpc_python_out=backend/python/coqui/ backend/backend.proto - python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/vall-e-x/ --grpc_python_out=backend/python/vall-e-x/ backend/backend.proto - python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/vllm/ --grpc_python_out=backend/python/vllm/ backend/backend.proto - python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/petals/ --grpc_python_out=backend/python/petals/ backend/backend.proto - python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/mamba/ --grpc_python_out=backend/python/mamba/ backend/backend.proto - python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/exllama2/ --grpc_python_out=backend/python/exllama2/ backend/backend.proto +.PHONY: protogen-go-clean +protogen-go-clean: + $(RM) pkg/grpc/proto/backend.pb.go pkg/grpc/proto/backend_grpc.pb.go + $(RM) bin/* + +.PHONY: protogen-python +protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen sentencetransformers-protogen transformers-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen + +.PHONY: protogen-python-clean +protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean + +.PHONY: autogptq-protogen +autogptq-protogen: + $(MAKE) -C backend/python/autogptq protogen + +.PHONY: autogptq-protogen-clean +autogptq-protogen-clean: + $(MAKE) -C backend/python/autogptq protogen-clean + +.PHONY: bark-protogen +bark-protogen: + $(MAKE) -C backend/python/bark protogen + +.PHONY: bark-protogen-clean +bark-protogen-clean: + $(MAKE) -C backend/python/bark protogen-clean + +.PHONY: coqui-protogen +coqui-protogen: + $(MAKE) -C backend/python/coqui protogen + +.PHONY: coqui-protogen-clean +coqui-protogen-clean: + $(MAKE) -C backend/python/coqui protogen-clean + +.PHONY: diffusers-protogen +diffusers-protogen: + $(MAKE) -C backend/python/diffusers protogen + +.PHONY: diffusers-protogen-clean +diffusers-protogen-clean: + $(MAKE) -C backend/python/diffusers protogen-clean + +.PHONY: exllama-protogen +exllama-protogen: + $(MAKE) -C backend/python/exllama protogen + +.PHONY: exllama-protogen-clean +exllama-protogen-clean: + $(MAKE) -C backend/python/exllama protogen-clean + +.PHONY: exllama2-protogen +exllama2-protogen: + $(MAKE) -C backend/python/exllama2 protogen + +.PHONY: exllama2-protogen-clean +exllama2-protogen-clean: + $(MAKE) -C backend/python/exllama2 protogen-clean + +.PHONY: mamba-protogen +mamba-protogen: + $(MAKE) -C backend/python/mamba protogen + +.PHONY: mamba-protogen-clean +mamba-protogen-clean: + $(MAKE) -C backend/python/mamba protogen-clean + +.PHONY: petals-protogen +petals-protogen: + $(MAKE) -C backend/python/petals protogen + +.PHONY: petals-protogen-clean +petals-protogen-clean: + $(MAKE) -C backend/python/petals protogen-clean + +.PHONY: sentencetransformers-protogen +sentencetransformers-protogen: + $(MAKE) -C backend/python/sentencetransformers protogen + +.PHONY: sentencetransformers-protogen-clean +sentencetransformers-protogen-clean: + $(MAKE) -C backend/python/sentencetransformers protogen-clean + +.PHONY: transformers-protogen +transformers-protogen: + $(MAKE) -C backend/python/transformers protogen + +.PHONY: transformers-protogen-clean +transformers-protogen-clean: + $(MAKE) -C backend/python/transformers protogen-clean + +.PHONY: transformers-musicgen-protogen +transformers-musicgen-protogen: + $(MAKE) -C backend/python/transformers-musicgen protogen + +.PHONY: transformers-musicgen-protogen-clean +transformers-musicgen-protogen-clean: + $(MAKE) -C backend/python/transformers-musicgen protogen-clean + +.PHONY: vall-e-x-protogen +vall-e-x-protogen: + $(MAKE) -C backend/python/vall-e-x protogen + +.PHONY: vall-e-x-protogen-clean +vall-e-x-protogen-clean: + $(MAKE) -C backend/python/vall-e-x protogen-clean + +.PHONY: vllm-protogen +vllm-protogen: + $(MAKE) -C backend/python/vllm protogen + +.PHONY: vllm-protogen-clean +vllm-protogen-clean: + $(MAKE) -C backend/python/vllm protogen-clean ## GRPC # Note: it is duplicated in the Dockerfile -prepare-extra-conda-environments: +prepare-extra-conda-environments: protogen-python $(MAKE) -C backend/python/autogptq $(MAKE) -C backend/python/bark $(MAKE) -C backend/python/coqui @@ -454,7 +562,7 @@ prepare-extra-conda-environments: $(MAKE) -C backend/python/petals $(MAKE) -C backend/python/exllama2 -prepare-test-extra: +prepare-test-extra: protogen-python $(MAKE) -C backend/python/transformers $(MAKE) -C backend/python/diffusers @@ -478,7 +586,7 @@ backend-assets/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/ @cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true @cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true -backend-assets/grpc: replace +backend-assets/grpc: protogen-go replace mkdir -p backend-assets/grpc backend-assets/grpc/bert-embeddings: sources/go-bert sources/go-bert/libgobert.a backend-assets/grpc diff --git a/backend/backend_grpc.pb.go b/backend/backend_grpc.pb.go deleted file mode 100644 index 5c97691d..00000000 --- a/backend/backend_grpc.pb.go +++ /dev/null @@ -1,457 +0,0 @@ -// Code generated by protoc-gen-go-grpc. DO NOT EDIT. -// versions: -// - protoc-gen-go-grpc v1.2.0 -// - protoc v4.23.4 -// source: backend/backend.proto - -package proto - -import ( - context "context" - grpc "google.golang.org/grpc" - codes "google.golang.org/grpc/codes" - status "google.golang.org/grpc/status" -) - -// This is a compile-time assertion to ensure that this generated file -// is compatible with the grpc package it is being compiled against. -// Requires gRPC-Go v1.32.0 or later. -const _ = grpc.SupportPackageIsVersion7 - -// BackendClient is the client API for Backend service. -// -// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. -type BackendClient interface { - Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error) - Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error) - LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error) - PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (Backend_PredictStreamClient, error) - Embedding(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*EmbeddingResult, error) - GenerateImage(ctx context.Context, in *GenerateImageRequest, opts ...grpc.CallOption) (*Result, error) - AudioTranscription(ctx context.Context, in *TranscriptRequest, opts ...grpc.CallOption) (*TranscriptResult, error) - TTS(ctx context.Context, in *TTSRequest, opts ...grpc.CallOption) (*Result, error) - TokenizeString(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*TokenizationResponse, error) - Status(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*StatusResponse, error) -} - -type backendClient struct { - cc grpc.ClientConnInterface -} - -func NewBackendClient(cc grpc.ClientConnInterface) BackendClient { - return &backendClient{cc} -} - -func (c *backendClient) Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error) { - out := new(Reply) - err := c.cc.Invoke(ctx, "/backend.Backend/Health", in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error) { - out := new(Reply) - err := c.cc.Invoke(ctx, "/backend.Backend/Predict", in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error) { - out := new(Result) - err := c.cc.Invoke(ctx, "/backend.Backend/LoadModel", in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (Backend_PredictStreamClient, error) { - stream, err := c.cc.NewStream(ctx, &Backend_ServiceDesc.Streams[0], "/backend.Backend/PredictStream", opts...) - if err != nil { - return nil, err - } - x := &backendPredictStreamClient{stream} - if err := x.ClientStream.SendMsg(in); err != nil { - return nil, err - } - if err := x.ClientStream.CloseSend(); err != nil { - return nil, err - } - return x, nil -} - -type Backend_PredictStreamClient interface { - Recv() (*Reply, error) - grpc.ClientStream -} - -type backendPredictStreamClient struct { - grpc.ClientStream -} - -func (x *backendPredictStreamClient) Recv() (*Reply, error) { - m := new(Reply) - if err := x.ClientStream.RecvMsg(m); err != nil { - return nil, err - } - return m, nil -} - -func (c *backendClient) Embedding(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*EmbeddingResult, error) { - out := new(EmbeddingResult) - err := c.cc.Invoke(ctx, "/backend.Backend/Embedding", in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) GenerateImage(ctx context.Context, in *GenerateImageRequest, opts ...grpc.CallOption) (*Result, error) { - out := new(Result) - err := c.cc.Invoke(ctx, "/backend.Backend/GenerateImage", in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) AudioTranscription(ctx context.Context, in *TranscriptRequest, opts ...grpc.CallOption) (*TranscriptResult, error) { - out := new(TranscriptResult) - err := c.cc.Invoke(ctx, "/backend.Backend/AudioTranscription", in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) TTS(ctx context.Context, in *TTSRequest, opts ...grpc.CallOption) (*Result, error) { - out := new(Result) - err := c.cc.Invoke(ctx, "/backend.Backend/TTS", in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) TokenizeString(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*TokenizationResponse, error) { - out := new(TokenizationResponse) - err := c.cc.Invoke(ctx, "/backend.Backend/TokenizeString", in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) Status(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*StatusResponse, error) { - out := new(StatusResponse) - err := c.cc.Invoke(ctx, "/backend.Backend/Status", in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -// BackendServer is the server API for Backend service. -// All implementations must embed UnimplementedBackendServer -// for forward compatibility -type BackendServer interface { - Health(context.Context, *HealthMessage) (*Reply, error) - Predict(context.Context, *PredictOptions) (*Reply, error) - LoadModel(context.Context, *ModelOptions) (*Result, error) - PredictStream(*PredictOptions, Backend_PredictStreamServer) error - Embedding(context.Context, *PredictOptions) (*EmbeddingResult, error) - GenerateImage(context.Context, *GenerateImageRequest) (*Result, error) - AudioTranscription(context.Context, *TranscriptRequest) (*TranscriptResult, error) - TTS(context.Context, *TTSRequest) (*Result, error) - TokenizeString(context.Context, *PredictOptions) (*TokenizationResponse, error) - Status(context.Context, *HealthMessage) (*StatusResponse, error) - mustEmbedUnimplementedBackendServer() -} - -// UnimplementedBackendServer must be embedded to have forward compatible implementations. -type UnimplementedBackendServer struct { -} - -func (UnimplementedBackendServer) Health(context.Context, *HealthMessage) (*Reply, error) { - return nil, status.Errorf(codes.Unimplemented, "method Health not implemented") -} -func (UnimplementedBackendServer) Predict(context.Context, *PredictOptions) (*Reply, error) { - return nil, status.Errorf(codes.Unimplemented, "method Predict not implemented") -} -func (UnimplementedBackendServer) LoadModel(context.Context, *ModelOptions) (*Result, error) { - return nil, status.Errorf(codes.Unimplemented, "method LoadModel not implemented") -} -func (UnimplementedBackendServer) PredictStream(*PredictOptions, Backend_PredictStreamServer) error { - return status.Errorf(codes.Unimplemented, "method PredictStream not implemented") -} -func (UnimplementedBackendServer) Embedding(context.Context, *PredictOptions) (*EmbeddingResult, error) { - return nil, status.Errorf(codes.Unimplemented, "method Embedding not implemented") -} -func (UnimplementedBackendServer) GenerateImage(context.Context, *GenerateImageRequest) (*Result, error) { - return nil, status.Errorf(codes.Unimplemented, "method GenerateImage not implemented") -} -func (UnimplementedBackendServer) AudioTranscription(context.Context, *TranscriptRequest) (*TranscriptResult, error) { - return nil, status.Errorf(codes.Unimplemented, "method AudioTranscription not implemented") -} -func (UnimplementedBackendServer) TTS(context.Context, *TTSRequest) (*Result, error) { - return nil, status.Errorf(codes.Unimplemented, "method TTS not implemented") -} -func (UnimplementedBackendServer) TokenizeString(context.Context, *PredictOptions) (*TokenizationResponse, error) { - return nil, status.Errorf(codes.Unimplemented, "method TokenizeString not implemented") -} -func (UnimplementedBackendServer) Status(context.Context, *HealthMessage) (*StatusResponse, error) { - return nil, status.Errorf(codes.Unimplemented, "method Status not implemented") -} -func (UnimplementedBackendServer) mustEmbedUnimplementedBackendServer() {} - -// UnsafeBackendServer may be embedded to opt out of forward compatibility for this service. -// Use of this interface is not recommended, as added methods to BackendServer will -// result in compilation errors. -type UnsafeBackendServer interface { - mustEmbedUnimplementedBackendServer() -} - -func RegisterBackendServer(s grpc.ServiceRegistrar, srv BackendServer) { - s.RegisterService(&Backend_ServiceDesc, srv) -} - -func _Backend_Health_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(HealthMessage) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).Health(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: "/backend.Backend/Health", - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).Health(ctx, req.(*HealthMessage)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_Predict_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(PredictOptions) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).Predict(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: "/backend.Backend/Predict", - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).Predict(ctx, req.(*PredictOptions)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_LoadModel_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(ModelOptions) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).LoadModel(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: "/backend.Backend/LoadModel", - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).LoadModel(ctx, req.(*ModelOptions)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_PredictStream_Handler(srv interface{}, stream grpc.ServerStream) error { - m := new(PredictOptions) - if err := stream.RecvMsg(m); err != nil { - return err - } - return srv.(BackendServer).PredictStream(m, &backendPredictStreamServer{stream}) -} - -type Backend_PredictStreamServer interface { - Send(*Reply) error - grpc.ServerStream -} - -type backendPredictStreamServer struct { - grpc.ServerStream -} - -func (x *backendPredictStreamServer) Send(m *Reply) error { - return x.ServerStream.SendMsg(m) -} - -func _Backend_Embedding_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(PredictOptions) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).Embedding(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: "/backend.Backend/Embedding", - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).Embedding(ctx, req.(*PredictOptions)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_GenerateImage_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(GenerateImageRequest) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).GenerateImage(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: "/backend.Backend/GenerateImage", - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).GenerateImage(ctx, req.(*GenerateImageRequest)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_AudioTranscription_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(TranscriptRequest) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).AudioTranscription(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: "/backend.Backend/AudioTranscription", - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).AudioTranscription(ctx, req.(*TranscriptRequest)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_TTS_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(TTSRequest) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).TTS(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: "/backend.Backend/TTS", - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).TTS(ctx, req.(*TTSRequest)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_TokenizeString_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(PredictOptions) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).TokenizeString(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: "/backend.Backend/TokenizeString", - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).TokenizeString(ctx, req.(*PredictOptions)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_Status_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(HealthMessage) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).Status(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: "/backend.Backend/Status", - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).Status(ctx, req.(*HealthMessage)) - } - return interceptor(ctx, in, info, handler) -} - -// Backend_ServiceDesc is the grpc.ServiceDesc for Backend service. -// It's only intended for direct use with grpc.RegisterService, -// and not to be introspected or modified (even as a copy) -var Backend_ServiceDesc = grpc.ServiceDesc{ - ServiceName: "backend.Backend", - HandlerType: (*BackendServer)(nil), - Methods: []grpc.MethodDesc{ - { - MethodName: "Health", - Handler: _Backend_Health_Handler, - }, - { - MethodName: "Predict", - Handler: _Backend_Predict_Handler, - }, - { - MethodName: "LoadModel", - Handler: _Backend_LoadModel_Handler, - }, - { - MethodName: "Embedding", - Handler: _Backend_Embedding_Handler, - }, - { - MethodName: "GenerateImage", - Handler: _Backend_GenerateImage_Handler, - }, - { - MethodName: "AudioTranscription", - Handler: _Backend_AudioTranscription_Handler, - }, - { - MethodName: "TTS", - Handler: _Backend_TTS_Handler, - }, - { - MethodName: "TokenizeString", - Handler: _Backend_TokenizeString_Handler, - }, - { - MethodName: "Status", - Handler: _Backend_Status_Handler, - }, - }, - Streams: []grpc.StreamDesc{ - { - StreamName: "PredictStream", - Handler: _Backend_PredictStream_Handler, - ServerStreams: true, - }, - }, - Metadata: "backend/backend.proto", -} diff --git a/backend/python/autogptq/Makefile b/backend/python/autogptq/Makefile index dfae12c1..eb81f045 100644 --- a/backend/python/autogptq/Makefile +++ b/backend/python/autogptq/Makefile @@ -1,4 +1,13 @@ .PHONY: autogptq -autogptq: +autogptq: protogen $(MAKE) -C ../common-env/transformers +.PHONY: protogen +protogen: backend_pb2_grpc.py backend_pb2.py + +.PHONY: protogen-clean +protogen-clean: + $(RM) backend_pb2_grpc.py backend_pb2.py + +backend_pb2_grpc.py backend_pb2.py: + python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto \ No newline at end of file diff --git a/backend/python/autogptq/backend_pb2.py b/backend/python/autogptq/backend_pb2.py deleted file mode 100644 index 24b6de3b..00000000 --- a/backend/python/autogptq/backend_pb2.py +++ /dev/null @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: backend.proto -# Protobuf Python Version: 4.25.1 -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import symbol_database as _symbol_database -from google.protobuf.internal import builder as _builder -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') - -_globals = globals() -_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) -if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' - _globals['_STORESKEY']._serialized_start=26 - _globals['_STORESKEY']._serialized_end=53 - _globals['_STORESVALUE']._serialized_start=55 - _globals['_STORESVALUE']._serialized_end=83 - _globals['_STORESSETOPTIONS']._serialized_start=85 - _globals['_STORESSETOPTIONS']._serialized_end=175 - _globals['_STORESDELETEOPTIONS']._serialized_start=177 - _globals['_STORESDELETEOPTIONS']._serialized_end=232 - _globals['_STORESGETOPTIONS']._serialized_start=234 - _globals['_STORESGETOPTIONS']._serialized_end=286 - _globals['_STORESGETRESULT']._serialized_start=288 - _globals['_STORESGETRESULT']._serialized_end=377 - _globals['_STORESFINDOPTIONS']._serialized_start=379 - _globals['_STORESFINDOPTIONS']._serialized_end=445 - _globals['_STORESFINDRESULT']._serialized_start=447 - _globals['_STORESFINDRESULT']._serialized_end=559 - _globals['_HEALTHMESSAGE']._serialized_start=561 - _globals['_HEALTHMESSAGE']._serialized_end=576 - _globals['_PREDICTOPTIONS']._serialized_start=579 - _globals['_PREDICTOPTIONS']._serialized_end=1451 - _globals['_REPLY']._serialized_start=1453 - _globals['_REPLY']._serialized_end=1477 - _globals['_MODELOPTIONS']._serialized_start=1480 - _globals['_MODELOPTIONS']._serialized_end=2552 - _globals['_RESULT']._serialized_start=2554 - _globals['_RESULT']._serialized_end=2596 - _globals['_EMBEDDINGRESULT']._serialized_start=2598 - _globals['_EMBEDDINGRESULT']._serialized_end=2635 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 - _globals['_TRANSCRIPTRESULT']._serialized_start=2706 - _globals['_TRANSCRIPTRESULT']._serialized_end=2784 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 - _globals['_TTSREQUEST']._serialized_start=3095 - _globals['_TTSREQUEST']._serialized_end=3164 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 - _globals['_MEMORYUSAGEDATA']._serialized_start=3223 - _globals['_MEMORYUSAGEDATA']._serialized_end=3365 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 - _globals['_STATUSRESPONSE']._serialized_start=3368 - _globals['_STATUSRESPONSE']._serialized_end=3541 - _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 - _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 - _globals['_MESSAGE']._serialized_start=3543 - _globals['_MESSAGE']._serialized_end=3583 - _globals['_BACKEND']._serialized_start=3586 - _globals['_BACKEND']._serialized_end=4477 -# @@protoc_insertion_point(module_scope) diff --git a/backend/python/autogptq/backend_pb2_grpc.py b/backend/python/autogptq/backend_pb2_grpc.py deleted file mode 100644 index e06fccf3..00000000 --- a/backend/python/autogptq/backend_pb2_grpc.py +++ /dev/null @@ -1,495 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc - -import backend_pb2 as backend__pb2 - - -class BackendStub(object): - """Missing associated documentation comment in .proto file.""" - - def __init__(self, channel): - """Constructor. - - Args: - channel: A grpc.Channel. - """ - self.Health = channel.unary_unary( - '/backend.Backend/Health', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Predict = channel.unary_unary( - '/backend.Backend/Predict', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.LoadModel = channel.unary_unary( - '/backend.Backend/LoadModel', - request_serializer=backend__pb2.ModelOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.PredictStream = channel.unary_stream( - '/backend.Backend/PredictStream', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Embedding = channel.unary_unary( - '/backend.Backend/Embedding', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.EmbeddingResult.FromString, - ) - self.GenerateImage = channel.unary_unary( - '/backend.Backend/GenerateImage', - request_serializer=backend__pb2.GenerateImageRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.AudioTranscription = channel.unary_unary( - '/backend.Backend/AudioTranscription', - request_serializer=backend__pb2.TranscriptRequest.SerializeToString, - response_deserializer=backend__pb2.TranscriptResult.FromString, - ) - self.TTS = channel.unary_unary( - '/backend.Backend/TTS', - request_serializer=backend__pb2.TTSRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.TokenizeString = channel.unary_unary( - '/backend.Backend/TokenizeString', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.TokenizationResponse.FromString, - ) - self.Status = channel.unary_unary( - '/backend.Backend/Status', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.StatusResponse.FromString, - ) - self.StoresSet = channel.unary_unary( - '/backend.Backend/StoresSet', - request_serializer=backend__pb2.StoresSetOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresDelete = channel.unary_unary( - '/backend.Backend/StoresDelete', - request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresGet = channel.unary_unary( - '/backend.Backend/StoresGet', - request_serializer=backend__pb2.StoresGetOptions.SerializeToString, - response_deserializer=backend__pb2.StoresGetResult.FromString, - ) - self.StoresFind = channel.unary_unary( - '/backend.Backend/StoresFind', - request_serializer=backend__pb2.StoresFindOptions.SerializeToString, - response_deserializer=backend__pb2.StoresFindResult.FromString, - ) - - -class BackendServicer(object): - """Missing associated documentation comment in .proto file.""" - - def Health(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Predict(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def LoadModel(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def PredictStream(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Embedding(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def GenerateImage(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def AudioTranscription(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TTS(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TokenizeString(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Status(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresSet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresDelete(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresGet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresFind(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - -def add_BackendServicer_to_server(servicer, server): - rpc_method_handlers = { - 'Health': grpc.unary_unary_rpc_method_handler( - servicer.Health, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Predict': grpc.unary_unary_rpc_method_handler( - servicer.Predict, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'LoadModel': grpc.unary_unary_rpc_method_handler( - servicer.LoadModel, - request_deserializer=backend__pb2.ModelOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'PredictStream': grpc.unary_stream_rpc_method_handler( - servicer.PredictStream, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Embedding': grpc.unary_unary_rpc_method_handler( - servicer.Embedding, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.EmbeddingResult.SerializeToString, - ), - 'GenerateImage': grpc.unary_unary_rpc_method_handler( - servicer.GenerateImage, - request_deserializer=backend__pb2.GenerateImageRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'AudioTranscription': grpc.unary_unary_rpc_method_handler( - servicer.AudioTranscription, - request_deserializer=backend__pb2.TranscriptRequest.FromString, - response_serializer=backend__pb2.TranscriptResult.SerializeToString, - ), - 'TTS': grpc.unary_unary_rpc_method_handler( - servicer.TTS, - request_deserializer=backend__pb2.TTSRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'TokenizeString': grpc.unary_unary_rpc_method_handler( - servicer.TokenizeString, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.TokenizationResponse.SerializeToString, - ), - 'Status': grpc.unary_unary_rpc_method_handler( - servicer.Status, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.StatusResponse.SerializeToString, - ), - 'StoresSet': grpc.unary_unary_rpc_method_handler( - servicer.StoresSet, - request_deserializer=backend__pb2.StoresSetOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresDelete': grpc.unary_unary_rpc_method_handler( - servicer.StoresDelete, - request_deserializer=backend__pb2.StoresDeleteOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresGet': grpc.unary_unary_rpc_method_handler( - servicer.StoresGet, - request_deserializer=backend__pb2.StoresGetOptions.FromString, - response_serializer=backend__pb2.StoresGetResult.SerializeToString, - ), - 'StoresFind': grpc.unary_unary_rpc_method_handler( - servicer.StoresFind, - request_deserializer=backend__pb2.StoresFindOptions.FromString, - response_serializer=backend__pb2.StoresFindResult.SerializeToString, - ), - } - generic_handler = grpc.method_handlers_generic_handler( - 'backend.Backend', rpc_method_handlers) - server.add_generic_rpc_handlers((generic_handler,)) - - - # This class is part of an EXPERIMENTAL API. -class Backend(object): - """Missing associated documentation comment in .proto file.""" - - @staticmethod - def Health(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Predict(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def LoadModel(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel', - backend__pb2.ModelOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def PredictStream(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Embedding(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.EmbeddingResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def GenerateImage(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage', - backend__pb2.GenerateImageRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def AudioTranscription(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription', - backend__pb2.TranscriptRequest.SerializeToString, - backend__pb2.TranscriptResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TTS(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS', - backend__pb2.TTSRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TokenizeString(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.TokenizationResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Status(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.StatusResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresSet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', - backend__pb2.StoresSetOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresDelete(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', - backend__pb2.StoresDeleteOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresGet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', - backend__pb2.StoresGetOptions.SerializeToString, - backend__pb2.StoresGetResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresFind(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', - backend__pb2.StoresFindOptions.SerializeToString, - backend__pb2.StoresFindResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/bark/Makefile b/backend/python/bark/Makefile index 68f73b29..a16308f7 100644 --- a/backend/python/bark/Makefile +++ b/backend/python/bark/Makefile @@ -1,15 +1,25 @@ .PHONY: ttsbark -ttsbark: +ttsbark: protogen $(MAKE) -C ../common-env/transformers .PHONY: run -run: +run: protogen @echo "Running bark..." bash run.sh @echo "bark run." .PHONY: test -test: +test: protogen @echo "Testing bark..." bash test.sh @echo "bark tested." + +.PHONY: protogen +protogen: backend_pb2_grpc.py backend_pb2.py + +.PHONY: protogen-clean +protogen-clean: + $(RM) backend_pb2_grpc.py backend_pb2.py + +backend_pb2_grpc.py backend_pb2.py: + python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto \ No newline at end of file diff --git a/backend/python/bark/backend_pb2.py b/backend/python/bark/backend_pb2.py deleted file mode 100644 index 24b6de3b..00000000 --- a/backend/python/bark/backend_pb2.py +++ /dev/null @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: backend.proto -# Protobuf Python Version: 4.25.1 -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import symbol_database as _symbol_database -from google.protobuf.internal import builder as _builder -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') - -_globals = globals() -_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) -if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' - _globals['_STORESKEY']._serialized_start=26 - _globals['_STORESKEY']._serialized_end=53 - _globals['_STORESVALUE']._serialized_start=55 - _globals['_STORESVALUE']._serialized_end=83 - _globals['_STORESSETOPTIONS']._serialized_start=85 - _globals['_STORESSETOPTIONS']._serialized_end=175 - _globals['_STORESDELETEOPTIONS']._serialized_start=177 - _globals['_STORESDELETEOPTIONS']._serialized_end=232 - _globals['_STORESGETOPTIONS']._serialized_start=234 - _globals['_STORESGETOPTIONS']._serialized_end=286 - _globals['_STORESGETRESULT']._serialized_start=288 - _globals['_STORESGETRESULT']._serialized_end=377 - _globals['_STORESFINDOPTIONS']._serialized_start=379 - _globals['_STORESFINDOPTIONS']._serialized_end=445 - _globals['_STORESFINDRESULT']._serialized_start=447 - _globals['_STORESFINDRESULT']._serialized_end=559 - _globals['_HEALTHMESSAGE']._serialized_start=561 - _globals['_HEALTHMESSAGE']._serialized_end=576 - _globals['_PREDICTOPTIONS']._serialized_start=579 - _globals['_PREDICTOPTIONS']._serialized_end=1451 - _globals['_REPLY']._serialized_start=1453 - _globals['_REPLY']._serialized_end=1477 - _globals['_MODELOPTIONS']._serialized_start=1480 - _globals['_MODELOPTIONS']._serialized_end=2552 - _globals['_RESULT']._serialized_start=2554 - _globals['_RESULT']._serialized_end=2596 - _globals['_EMBEDDINGRESULT']._serialized_start=2598 - _globals['_EMBEDDINGRESULT']._serialized_end=2635 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 - _globals['_TRANSCRIPTRESULT']._serialized_start=2706 - _globals['_TRANSCRIPTRESULT']._serialized_end=2784 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 - _globals['_TTSREQUEST']._serialized_start=3095 - _globals['_TTSREQUEST']._serialized_end=3164 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 - _globals['_MEMORYUSAGEDATA']._serialized_start=3223 - _globals['_MEMORYUSAGEDATA']._serialized_end=3365 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 - _globals['_STATUSRESPONSE']._serialized_start=3368 - _globals['_STATUSRESPONSE']._serialized_end=3541 - _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 - _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 - _globals['_MESSAGE']._serialized_start=3543 - _globals['_MESSAGE']._serialized_end=3583 - _globals['_BACKEND']._serialized_start=3586 - _globals['_BACKEND']._serialized_end=4477 -# @@protoc_insertion_point(module_scope) diff --git a/backend/python/bark/backend_pb2_grpc.py b/backend/python/bark/backend_pb2_grpc.py deleted file mode 100644 index e06fccf3..00000000 --- a/backend/python/bark/backend_pb2_grpc.py +++ /dev/null @@ -1,495 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc - -import backend_pb2 as backend__pb2 - - -class BackendStub(object): - """Missing associated documentation comment in .proto file.""" - - def __init__(self, channel): - """Constructor. - - Args: - channel: A grpc.Channel. - """ - self.Health = channel.unary_unary( - '/backend.Backend/Health', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Predict = channel.unary_unary( - '/backend.Backend/Predict', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.LoadModel = channel.unary_unary( - '/backend.Backend/LoadModel', - request_serializer=backend__pb2.ModelOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.PredictStream = channel.unary_stream( - '/backend.Backend/PredictStream', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Embedding = channel.unary_unary( - '/backend.Backend/Embedding', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.EmbeddingResult.FromString, - ) - self.GenerateImage = channel.unary_unary( - '/backend.Backend/GenerateImage', - request_serializer=backend__pb2.GenerateImageRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.AudioTranscription = channel.unary_unary( - '/backend.Backend/AudioTranscription', - request_serializer=backend__pb2.TranscriptRequest.SerializeToString, - response_deserializer=backend__pb2.TranscriptResult.FromString, - ) - self.TTS = channel.unary_unary( - '/backend.Backend/TTS', - request_serializer=backend__pb2.TTSRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.TokenizeString = channel.unary_unary( - '/backend.Backend/TokenizeString', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.TokenizationResponse.FromString, - ) - self.Status = channel.unary_unary( - '/backend.Backend/Status', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.StatusResponse.FromString, - ) - self.StoresSet = channel.unary_unary( - '/backend.Backend/StoresSet', - request_serializer=backend__pb2.StoresSetOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresDelete = channel.unary_unary( - '/backend.Backend/StoresDelete', - request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresGet = channel.unary_unary( - '/backend.Backend/StoresGet', - request_serializer=backend__pb2.StoresGetOptions.SerializeToString, - response_deserializer=backend__pb2.StoresGetResult.FromString, - ) - self.StoresFind = channel.unary_unary( - '/backend.Backend/StoresFind', - request_serializer=backend__pb2.StoresFindOptions.SerializeToString, - response_deserializer=backend__pb2.StoresFindResult.FromString, - ) - - -class BackendServicer(object): - """Missing associated documentation comment in .proto file.""" - - def Health(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Predict(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def LoadModel(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def PredictStream(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Embedding(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def GenerateImage(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def AudioTranscription(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TTS(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TokenizeString(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Status(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresSet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresDelete(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresGet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresFind(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - -def add_BackendServicer_to_server(servicer, server): - rpc_method_handlers = { - 'Health': grpc.unary_unary_rpc_method_handler( - servicer.Health, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Predict': grpc.unary_unary_rpc_method_handler( - servicer.Predict, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'LoadModel': grpc.unary_unary_rpc_method_handler( - servicer.LoadModel, - request_deserializer=backend__pb2.ModelOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'PredictStream': grpc.unary_stream_rpc_method_handler( - servicer.PredictStream, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Embedding': grpc.unary_unary_rpc_method_handler( - servicer.Embedding, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.EmbeddingResult.SerializeToString, - ), - 'GenerateImage': grpc.unary_unary_rpc_method_handler( - servicer.GenerateImage, - request_deserializer=backend__pb2.GenerateImageRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'AudioTranscription': grpc.unary_unary_rpc_method_handler( - servicer.AudioTranscription, - request_deserializer=backend__pb2.TranscriptRequest.FromString, - response_serializer=backend__pb2.TranscriptResult.SerializeToString, - ), - 'TTS': grpc.unary_unary_rpc_method_handler( - servicer.TTS, - request_deserializer=backend__pb2.TTSRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'TokenizeString': grpc.unary_unary_rpc_method_handler( - servicer.TokenizeString, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.TokenizationResponse.SerializeToString, - ), - 'Status': grpc.unary_unary_rpc_method_handler( - servicer.Status, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.StatusResponse.SerializeToString, - ), - 'StoresSet': grpc.unary_unary_rpc_method_handler( - servicer.StoresSet, - request_deserializer=backend__pb2.StoresSetOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresDelete': grpc.unary_unary_rpc_method_handler( - servicer.StoresDelete, - request_deserializer=backend__pb2.StoresDeleteOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresGet': grpc.unary_unary_rpc_method_handler( - servicer.StoresGet, - request_deserializer=backend__pb2.StoresGetOptions.FromString, - response_serializer=backend__pb2.StoresGetResult.SerializeToString, - ), - 'StoresFind': grpc.unary_unary_rpc_method_handler( - servicer.StoresFind, - request_deserializer=backend__pb2.StoresFindOptions.FromString, - response_serializer=backend__pb2.StoresFindResult.SerializeToString, - ), - } - generic_handler = grpc.method_handlers_generic_handler( - 'backend.Backend', rpc_method_handlers) - server.add_generic_rpc_handlers((generic_handler,)) - - - # This class is part of an EXPERIMENTAL API. -class Backend(object): - """Missing associated documentation comment in .proto file.""" - - @staticmethod - def Health(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Predict(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def LoadModel(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel', - backend__pb2.ModelOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def PredictStream(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Embedding(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.EmbeddingResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def GenerateImage(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage', - backend__pb2.GenerateImageRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def AudioTranscription(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription', - backend__pb2.TranscriptRequest.SerializeToString, - backend__pb2.TranscriptResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TTS(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS', - backend__pb2.TTSRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TokenizeString(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.TokenizationResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Status(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.StatusResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresSet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', - backend__pb2.StoresSetOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresDelete(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', - backend__pb2.StoresDeleteOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresGet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', - backend__pb2.StoresGetOptions.SerializeToString, - backend__pb2.StoresGetResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresFind(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', - backend__pb2.StoresFindOptions.SerializeToString, - backend__pb2.StoresFindResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/coqui/Makefile b/backend/python/coqui/Makefile index e0ec9001..475804c9 100644 --- a/backend/python/coqui/Makefile +++ b/backend/python/coqui/Makefile @@ -1,15 +1,25 @@ .PHONY: coqui -coqui: +coqui: protogen $(MAKE) -C ../common-env/transformers .PHONY: run -run: +run: protogen @echo "Running coqui..." bash run.sh @echo "coqui run." .PHONY: test -test: +test: protogen @echo "Testing coqui..." bash test.sh @echo "coqui tested." + +.PHONY: protogen +protogen: backend_pb2_grpc.py backend_pb2.py + +.PHONY: protogen-clean +protogen-clean: + $(RM) backend_pb2_grpc.py backend_pb2.py + +backend_pb2_grpc.py backend_pb2.py: + python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto \ No newline at end of file diff --git a/backend/python/coqui/backend_pb2.py b/backend/python/coqui/backend_pb2.py deleted file mode 100644 index 24b6de3b..00000000 --- a/backend/python/coqui/backend_pb2.py +++ /dev/null @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: backend.proto -# Protobuf Python Version: 4.25.1 -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import symbol_database as _symbol_database -from google.protobuf.internal import builder as _builder -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') - -_globals = globals() -_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) -if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' - _globals['_STORESKEY']._serialized_start=26 - _globals['_STORESKEY']._serialized_end=53 - _globals['_STORESVALUE']._serialized_start=55 - _globals['_STORESVALUE']._serialized_end=83 - _globals['_STORESSETOPTIONS']._serialized_start=85 - _globals['_STORESSETOPTIONS']._serialized_end=175 - _globals['_STORESDELETEOPTIONS']._serialized_start=177 - _globals['_STORESDELETEOPTIONS']._serialized_end=232 - _globals['_STORESGETOPTIONS']._serialized_start=234 - _globals['_STORESGETOPTIONS']._serialized_end=286 - _globals['_STORESGETRESULT']._serialized_start=288 - _globals['_STORESGETRESULT']._serialized_end=377 - _globals['_STORESFINDOPTIONS']._serialized_start=379 - _globals['_STORESFINDOPTIONS']._serialized_end=445 - _globals['_STORESFINDRESULT']._serialized_start=447 - _globals['_STORESFINDRESULT']._serialized_end=559 - _globals['_HEALTHMESSAGE']._serialized_start=561 - _globals['_HEALTHMESSAGE']._serialized_end=576 - _globals['_PREDICTOPTIONS']._serialized_start=579 - _globals['_PREDICTOPTIONS']._serialized_end=1451 - _globals['_REPLY']._serialized_start=1453 - _globals['_REPLY']._serialized_end=1477 - _globals['_MODELOPTIONS']._serialized_start=1480 - _globals['_MODELOPTIONS']._serialized_end=2552 - _globals['_RESULT']._serialized_start=2554 - _globals['_RESULT']._serialized_end=2596 - _globals['_EMBEDDINGRESULT']._serialized_start=2598 - _globals['_EMBEDDINGRESULT']._serialized_end=2635 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 - _globals['_TRANSCRIPTRESULT']._serialized_start=2706 - _globals['_TRANSCRIPTRESULT']._serialized_end=2784 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 - _globals['_TTSREQUEST']._serialized_start=3095 - _globals['_TTSREQUEST']._serialized_end=3164 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 - _globals['_MEMORYUSAGEDATA']._serialized_start=3223 - _globals['_MEMORYUSAGEDATA']._serialized_end=3365 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 - _globals['_STATUSRESPONSE']._serialized_start=3368 - _globals['_STATUSRESPONSE']._serialized_end=3541 - _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 - _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 - _globals['_MESSAGE']._serialized_start=3543 - _globals['_MESSAGE']._serialized_end=3583 - _globals['_BACKEND']._serialized_start=3586 - _globals['_BACKEND']._serialized_end=4477 -# @@protoc_insertion_point(module_scope) diff --git a/backend/python/coqui/backend_pb2_grpc.py b/backend/python/coqui/backend_pb2_grpc.py deleted file mode 100644 index e06fccf3..00000000 --- a/backend/python/coqui/backend_pb2_grpc.py +++ /dev/null @@ -1,495 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc - -import backend_pb2 as backend__pb2 - - -class BackendStub(object): - """Missing associated documentation comment in .proto file.""" - - def __init__(self, channel): - """Constructor. - - Args: - channel: A grpc.Channel. - """ - self.Health = channel.unary_unary( - '/backend.Backend/Health', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Predict = channel.unary_unary( - '/backend.Backend/Predict', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.LoadModel = channel.unary_unary( - '/backend.Backend/LoadModel', - request_serializer=backend__pb2.ModelOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.PredictStream = channel.unary_stream( - '/backend.Backend/PredictStream', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Embedding = channel.unary_unary( - '/backend.Backend/Embedding', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.EmbeddingResult.FromString, - ) - self.GenerateImage = channel.unary_unary( - '/backend.Backend/GenerateImage', - request_serializer=backend__pb2.GenerateImageRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.AudioTranscription = channel.unary_unary( - '/backend.Backend/AudioTranscription', - request_serializer=backend__pb2.TranscriptRequest.SerializeToString, - response_deserializer=backend__pb2.TranscriptResult.FromString, - ) - self.TTS = channel.unary_unary( - '/backend.Backend/TTS', - request_serializer=backend__pb2.TTSRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.TokenizeString = channel.unary_unary( - '/backend.Backend/TokenizeString', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.TokenizationResponse.FromString, - ) - self.Status = channel.unary_unary( - '/backend.Backend/Status', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.StatusResponse.FromString, - ) - self.StoresSet = channel.unary_unary( - '/backend.Backend/StoresSet', - request_serializer=backend__pb2.StoresSetOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresDelete = channel.unary_unary( - '/backend.Backend/StoresDelete', - request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresGet = channel.unary_unary( - '/backend.Backend/StoresGet', - request_serializer=backend__pb2.StoresGetOptions.SerializeToString, - response_deserializer=backend__pb2.StoresGetResult.FromString, - ) - self.StoresFind = channel.unary_unary( - '/backend.Backend/StoresFind', - request_serializer=backend__pb2.StoresFindOptions.SerializeToString, - response_deserializer=backend__pb2.StoresFindResult.FromString, - ) - - -class BackendServicer(object): - """Missing associated documentation comment in .proto file.""" - - def Health(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Predict(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def LoadModel(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def PredictStream(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Embedding(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def GenerateImage(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def AudioTranscription(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TTS(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TokenizeString(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Status(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresSet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresDelete(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresGet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresFind(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - -def add_BackendServicer_to_server(servicer, server): - rpc_method_handlers = { - 'Health': grpc.unary_unary_rpc_method_handler( - servicer.Health, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Predict': grpc.unary_unary_rpc_method_handler( - servicer.Predict, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'LoadModel': grpc.unary_unary_rpc_method_handler( - servicer.LoadModel, - request_deserializer=backend__pb2.ModelOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'PredictStream': grpc.unary_stream_rpc_method_handler( - servicer.PredictStream, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Embedding': grpc.unary_unary_rpc_method_handler( - servicer.Embedding, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.EmbeddingResult.SerializeToString, - ), - 'GenerateImage': grpc.unary_unary_rpc_method_handler( - servicer.GenerateImage, - request_deserializer=backend__pb2.GenerateImageRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'AudioTranscription': grpc.unary_unary_rpc_method_handler( - servicer.AudioTranscription, - request_deserializer=backend__pb2.TranscriptRequest.FromString, - response_serializer=backend__pb2.TranscriptResult.SerializeToString, - ), - 'TTS': grpc.unary_unary_rpc_method_handler( - servicer.TTS, - request_deserializer=backend__pb2.TTSRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'TokenizeString': grpc.unary_unary_rpc_method_handler( - servicer.TokenizeString, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.TokenizationResponse.SerializeToString, - ), - 'Status': grpc.unary_unary_rpc_method_handler( - servicer.Status, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.StatusResponse.SerializeToString, - ), - 'StoresSet': grpc.unary_unary_rpc_method_handler( - servicer.StoresSet, - request_deserializer=backend__pb2.StoresSetOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresDelete': grpc.unary_unary_rpc_method_handler( - servicer.StoresDelete, - request_deserializer=backend__pb2.StoresDeleteOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresGet': grpc.unary_unary_rpc_method_handler( - servicer.StoresGet, - request_deserializer=backend__pb2.StoresGetOptions.FromString, - response_serializer=backend__pb2.StoresGetResult.SerializeToString, - ), - 'StoresFind': grpc.unary_unary_rpc_method_handler( - servicer.StoresFind, - request_deserializer=backend__pb2.StoresFindOptions.FromString, - response_serializer=backend__pb2.StoresFindResult.SerializeToString, - ), - } - generic_handler = grpc.method_handlers_generic_handler( - 'backend.Backend', rpc_method_handlers) - server.add_generic_rpc_handlers((generic_handler,)) - - - # This class is part of an EXPERIMENTAL API. -class Backend(object): - """Missing associated documentation comment in .proto file.""" - - @staticmethod - def Health(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Predict(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def LoadModel(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel', - backend__pb2.ModelOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def PredictStream(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Embedding(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.EmbeddingResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def GenerateImage(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage', - backend__pb2.GenerateImageRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def AudioTranscription(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription', - backend__pb2.TranscriptRequest.SerializeToString, - backend__pb2.TranscriptResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TTS(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS', - backend__pb2.TTSRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TokenizeString(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.TokenizationResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Status(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.StatusResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresSet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', - backend__pb2.StoresSetOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresDelete(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', - backend__pb2.StoresDeleteOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresGet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', - backend__pb2.StoresGetOptions.SerializeToString, - backend__pb2.StoresGetResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresFind(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', - backend__pb2.StoresFindOptions.SerializeToString, - backend__pb2.StoresFindResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/diffusers/Makefile b/backend/python/diffusers/Makefile index 40e1d1a7..c73efdd2 100644 --- a/backend/python/diffusers/Makefile +++ b/backend/python/diffusers/Makefile @@ -12,15 +12,25 @@ export SKIP_CONDA=1 endif .PHONY: diffusers -diffusers: +diffusers: protogen @echo "Installing $(CONDA_ENV_PATH)..." bash install.sh $(CONDA_ENV_PATH) .PHONY: run -run: +run: protogen @echo "Running diffusers..." bash run.sh @echo "Diffusers run." -test: +test: protogen bash test.sh + +.PHONY: protogen +protogen: backend_pb2_grpc.py backend_pb2.py + +.PHONY: protogen-clean +protogen-clean: + $(RM) backend_pb2_grpc.py backend_pb2.py + +backend_pb2_grpc.py backend_pb2.py: + python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto \ No newline at end of file diff --git a/backend/python/diffusers/backend_pb2.py b/backend/python/diffusers/backend_pb2.py deleted file mode 100644 index 24b6de3b..00000000 --- a/backend/python/diffusers/backend_pb2.py +++ /dev/null @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: backend.proto -# Protobuf Python Version: 4.25.1 -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import symbol_database as _symbol_database -from google.protobuf.internal import builder as _builder -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') - -_globals = globals() -_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) -if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' - _globals['_STORESKEY']._serialized_start=26 - _globals['_STORESKEY']._serialized_end=53 - _globals['_STORESVALUE']._serialized_start=55 - _globals['_STORESVALUE']._serialized_end=83 - _globals['_STORESSETOPTIONS']._serialized_start=85 - _globals['_STORESSETOPTIONS']._serialized_end=175 - _globals['_STORESDELETEOPTIONS']._serialized_start=177 - _globals['_STORESDELETEOPTIONS']._serialized_end=232 - _globals['_STORESGETOPTIONS']._serialized_start=234 - _globals['_STORESGETOPTIONS']._serialized_end=286 - _globals['_STORESGETRESULT']._serialized_start=288 - _globals['_STORESGETRESULT']._serialized_end=377 - _globals['_STORESFINDOPTIONS']._serialized_start=379 - _globals['_STORESFINDOPTIONS']._serialized_end=445 - _globals['_STORESFINDRESULT']._serialized_start=447 - _globals['_STORESFINDRESULT']._serialized_end=559 - _globals['_HEALTHMESSAGE']._serialized_start=561 - _globals['_HEALTHMESSAGE']._serialized_end=576 - _globals['_PREDICTOPTIONS']._serialized_start=579 - _globals['_PREDICTOPTIONS']._serialized_end=1451 - _globals['_REPLY']._serialized_start=1453 - _globals['_REPLY']._serialized_end=1477 - _globals['_MODELOPTIONS']._serialized_start=1480 - _globals['_MODELOPTIONS']._serialized_end=2552 - _globals['_RESULT']._serialized_start=2554 - _globals['_RESULT']._serialized_end=2596 - _globals['_EMBEDDINGRESULT']._serialized_start=2598 - _globals['_EMBEDDINGRESULT']._serialized_end=2635 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 - _globals['_TRANSCRIPTRESULT']._serialized_start=2706 - _globals['_TRANSCRIPTRESULT']._serialized_end=2784 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 - _globals['_TTSREQUEST']._serialized_start=3095 - _globals['_TTSREQUEST']._serialized_end=3164 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 - _globals['_MEMORYUSAGEDATA']._serialized_start=3223 - _globals['_MEMORYUSAGEDATA']._serialized_end=3365 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 - _globals['_STATUSRESPONSE']._serialized_start=3368 - _globals['_STATUSRESPONSE']._serialized_end=3541 - _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 - _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 - _globals['_MESSAGE']._serialized_start=3543 - _globals['_MESSAGE']._serialized_end=3583 - _globals['_BACKEND']._serialized_start=3586 - _globals['_BACKEND']._serialized_end=4477 -# @@protoc_insertion_point(module_scope) diff --git a/backend/python/diffusers/backend_pb2_grpc.py b/backend/python/diffusers/backend_pb2_grpc.py deleted file mode 100644 index e06fccf3..00000000 --- a/backend/python/diffusers/backend_pb2_grpc.py +++ /dev/null @@ -1,495 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc - -import backend_pb2 as backend__pb2 - - -class BackendStub(object): - """Missing associated documentation comment in .proto file.""" - - def __init__(self, channel): - """Constructor. - - Args: - channel: A grpc.Channel. - """ - self.Health = channel.unary_unary( - '/backend.Backend/Health', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Predict = channel.unary_unary( - '/backend.Backend/Predict', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.LoadModel = channel.unary_unary( - '/backend.Backend/LoadModel', - request_serializer=backend__pb2.ModelOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.PredictStream = channel.unary_stream( - '/backend.Backend/PredictStream', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Embedding = channel.unary_unary( - '/backend.Backend/Embedding', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.EmbeddingResult.FromString, - ) - self.GenerateImage = channel.unary_unary( - '/backend.Backend/GenerateImage', - request_serializer=backend__pb2.GenerateImageRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.AudioTranscription = channel.unary_unary( - '/backend.Backend/AudioTranscription', - request_serializer=backend__pb2.TranscriptRequest.SerializeToString, - response_deserializer=backend__pb2.TranscriptResult.FromString, - ) - self.TTS = channel.unary_unary( - '/backend.Backend/TTS', - request_serializer=backend__pb2.TTSRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.TokenizeString = channel.unary_unary( - '/backend.Backend/TokenizeString', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.TokenizationResponse.FromString, - ) - self.Status = channel.unary_unary( - '/backend.Backend/Status', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.StatusResponse.FromString, - ) - self.StoresSet = channel.unary_unary( - '/backend.Backend/StoresSet', - request_serializer=backend__pb2.StoresSetOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresDelete = channel.unary_unary( - '/backend.Backend/StoresDelete', - request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresGet = channel.unary_unary( - '/backend.Backend/StoresGet', - request_serializer=backend__pb2.StoresGetOptions.SerializeToString, - response_deserializer=backend__pb2.StoresGetResult.FromString, - ) - self.StoresFind = channel.unary_unary( - '/backend.Backend/StoresFind', - request_serializer=backend__pb2.StoresFindOptions.SerializeToString, - response_deserializer=backend__pb2.StoresFindResult.FromString, - ) - - -class BackendServicer(object): - """Missing associated documentation comment in .proto file.""" - - def Health(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Predict(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def LoadModel(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def PredictStream(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Embedding(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def GenerateImage(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def AudioTranscription(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TTS(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TokenizeString(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Status(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresSet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresDelete(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresGet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresFind(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - -def add_BackendServicer_to_server(servicer, server): - rpc_method_handlers = { - 'Health': grpc.unary_unary_rpc_method_handler( - servicer.Health, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Predict': grpc.unary_unary_rpc_method_handler( - servicer.Predict, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'LoadModel': grpc.unary_unary_rpc_method_handler( - servicer.LoadModel, - request_deserializer=backend__pb2.ModelOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'PredictStream': grpc.unary_stream_rpc_method_handler( - servicer.PredictStream, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Embedding': grpc.unary_unary_rpc_method_handler( - servicer.Embedding, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.EmbeddingResult.SerializeToString, - ), - 'GenerateImage': grpc.unary_unary_rpc_method_handler( - servicer.GenerateImage, - request_deserializer=backend__pb2.GenerateImageRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'AudioTranscription': grpc.unary_unary_rpc_method_handler( - servicer.AudioTranscription, - request_deserializer=backend__pb2.TranscriptRequest.FromString, - response_serializer=backend__pb2.TranscriptResult.SerializeToString, - ), - 'TTS': grpc.unary_unary_rpc_method_handler( - servicer.TTS, - request_deserializer=backend__pb2.TTSRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'TokenizeString': grpc.unary_unary_rpc_method_handler( - servicer.TokenizeString, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.TokenizationResponse.SerializeToString, - ), - 'Status': grpc.unary_unary_rpc_method_handler( - servicer.Status, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.StatusResponse.SerializeToString, - ), - 'StoresSet': grpc.unary_unary_rpc_method_handler( - servicer.StoresSet, - request_deserializer=backend__pb2.StoresSetOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresDelete': grpc.unary_unary_rpc_method_handler( - servicer.StoresDelete, - request_deserializer=backend__pb2.StoresDeleteOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresGet': grpc.unary_unary_rpc_method_handler( - servicer.StoresGet, - request_deserializer=backend__pb2.StoresGetOptions.FromString, - response_serializer=backend__pb2.StoresGetResult.SerializeToString, - ), - 'StoresFind': grpc.unary_unary_rpc_method_handler( - servicer.StoresFind, - request_deserializer=backend__pb2.StoresFindOptions.FromString, - response_serializer=backend__pb2.StoresFindResult.SerializeToString, - ), - } - generic_handler = grpc.method_handlers_generic_handler( - 'backend.Backend', rpc_method_handlers) - server.add_generic_rpc_handlers((generic_handler,)) - - - # This class is part of an EXPERIMENTAL API. -class Backend(object): - """Missing associated documentation comment in .proto file.""" - - @staticmethod - def Health(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Predict(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def LoadModel(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel', - backend__pb2.ModelOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def PredictStream(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Embedding(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.EmbeddingResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def GenerateImage(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage', - backend__pb2.GenerateImageRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def AudioTranscription(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription', - backend__pb2.TranscriptRequest.SerializeToString, - backend__pb2.TranscriptResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TTS(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS', - backend__pb2.TTSRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TokenizeString(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.TokenizationResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Status(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.StatusResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresSet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', - backend__pb2.StoresSetOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresDelete(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', - backend__pb2.StoresDeleteOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresGet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', - backend__pb2.StoresGetOptions.SerializeToString, - backend__pb2.StoresGetResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresFind(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', - backend__pb2.StoresFindOptions.SerializeToString, - backend__pb2.StoresFindResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/exllama/Makefile b/backend/python/exllama/Makefile index b51adf76..15623448 100644 --- a/backend/python/exllama/Makefile +++ b/backend/python/exllama/Makefile @@ -1,11 +1,21 @@ export CONDA_ENV_PATH = "exllama.yml" .PHONY: exllama -exllama: +exllama: protogen bash install.sh ${CONDA_ENV_PATH} .PHONY: run -run: +run: protogen @echo "Running exllama..." bash run.sh @echo "exllama run." + +.PHONY: protogen +protogen: backend_pb2_grpc.py backend_pb2.py + +.PHONY: protogen-clean +protogen-clean: + $(RM) backend_pb2_grpc.py backend_pb2.py + +backend_pb2_grpc.py backend_pb2.py: + python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto \ No newline at end of file diff --git a/backend/python/exllama/backend_pb2.py b/backend/python/exllama/backend_pb2.py deleted file mode 100644 index 24b6de3b..00000000 --- a/backend/python/exllama/backend_pb2.py +++ /dev/null @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: backend.proto -# Protobuf Python Version: 4.25.1 -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import symbol_database as _symbol_database -from google.protobuf.internal import builder as _builder -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') - -_globals = globals() -_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) -if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' - _globals['_STORESKEY']._serialized_start=26 - _globals['_STORESKEY']._serialized_end=53 - _globals['_STORESVALUE']._serialized_start=55 - _globals['_STORESVALUE']._serialized_end=83 - _globals['_STORESSETOPTIONS']._serialized_start=85 - _globals['_STORESSETOPTIONS']._serialized_end=175 - _globals['_STORESDELETEOPTIONS']._serialized_start=177 - _globals['_STORESDELETEOPTIONS']._serialized_end=232 - _globals['_STORESGETOPTIONS']._serialized_start=234 - _globals['_STORESGETOPTIONS']._serialized_end=286 - _globals['_STORESGETRESULT']._serialized_start=288 - _globals['_STORESGETRESULT']._serialized_end=377 - _globals['_STORESFINDOPTIONS']._serialized_start=379 - _globals['_STORESFINDOPTIONS']._serialized_end=445 - _globals['_STORESFINDRESULT']._serialized_start=447 - _globals['_STORESFINDRESULT']._serialized_end=559 - _globals['_HEALTHMESSAGE']._serialized_start=561 - _globals['_HEALTHMESSAGE']._serialized_end=576 - _globals['_PREDICTOPTIONS']._serialized_start=579 - _globals['_PREDICTOPTIONS']._serialized_end=1451 - _globals['_REPLY']._serialized_start=1453 - _globals['_REPLY']._serialized_end=1477 - _globals['_MODELOPTIONS']._serialized_start=1480 - _globals['_MODELOPTIONS']._serialized_end=2552 - _globals['_RESULT']._serialized_start=2554 - _globals['_RESULT']._serialized_end=2596 - _globals['_EMBEDDINGRESULT']._serialized_start=2598 - _globals['_EMBEDDINGRESULT']._serialized_end=2635 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 - _globals['_TRANSCRIPTRESULT']._serialized_start=2706 - _globals['_TRANSCRIPTRESULT']._serialized_end=2784 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 - _globals['_TTSREQUEST']._serialized_start=3095 - _globals['_TTSREQUEST']._serialized_end=3164 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 - _globals['_MEMORYUSAGEDATA']._serialized_start=3223 - _globals['_MEMORYUSAGEDATA']._serialized_end=3365 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 - _globals['_STATUSRESPONSE']._serialized_start=3368 - _globals['_STATUSRESPONSE']._serialized_end=3541 - _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 - _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 - _globals['_MESSAGE']._serialized_start=3543 - _globals['_MESSAGE']._serialized_end=3583 - _globals['_BACKEND']._serialized_start=3586 - _globals['_BACKEND']._serialized_end=4477 -# @@protoc_insertion_point(module_scope) diff --git a/backend/python/exllama/backend_pb2_grpc.py b/backend/python/exllama/backend_pb2_grpc.py deleted file mode 100644 index e06fccf3..00000000 --- a/backend/python/exllama/backend_pb2_grpc.py +++ /dev/null @@ -1,495 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc - -import backend_pb2 as backend__pb2 - - -class BackendStub(object): - """Missing associated documentation comment in .proto file.""" - - def __init__(self, channel): - """Constructor. - - Args: - channel: A grpc.Channel. - """ - self.Health = channel.unary_unary( - '/backend.Backend/Health', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Predict = channel.unary_unary( - '/backend.Backend/Predict', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.LoadModel = channel.unary_unary( - '/backend.Backend/LoadModel', - request_serializer=backend__pb2.ModelOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.PredictStream = channel.unary_stream( - '/backend.Backend/PredictStream', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Embedding = channel.unary_unary( - '/backend.Backend/Embedding', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.EmbeddingResult.FromString, - ) - self.GenerateImage = channel.unary_unary( - '/backend.Backend/GenerateImage', - request_serializer=backend__pb2.GenerateImageRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.AudioTranscription = channel.unary_unary( - '/backend.Backend/AudioTranscription', - request_serializer=backend__pb2.TranscriptRequest.SerializeToString, - response_deserializer=backend__pb2.TranscriptResult.FromString, - ) - self.TTS = channel.unary_unary( - '/backend.Backend/TTS', - request_serializer=backend__pb2.TTSRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.TokenizeString = channel.unary_unary( - '/backend.Backend/TokenizeString', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.TokenizationResponse.FromString, - ) - self.Status = channel.unary_unary( - '/backend.Backend/Status', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.StatusResponse.FromString, - ) - self.StoresSet = channel.unary_unary( - '/backend.Backend/StoresSet', - request_serializer=backend__pb2.StoresSetOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresDelete = channel.unary_unary( - '/backend.Backend/StoresDelete', - request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresGet = channel.unary_unary( - '/backend.Backend/StoresGet', - request_serializer=backend__pb2.StoresGetOptions.SerializeToString, - response_deserializer=backend__pb2.StoresGetResult.FromString, - ) - self.StoresFind = channel.unary_unary( - '/backend.Backend/StoresFind', - request_serializer=backend__pb2.StoresFindOptions.SerializeToString, - response_deserializer=backend__pb2.StoresFindResult.FromString, - ) - - -class BackendServicer(object): - """Missing associated documentation comment in .proto file.""" - - def Health(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Predict(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def LoadModel(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def PredictStream(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Embedding(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def GenerateImage(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def AudioTranscription(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TTS(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TokenizeString(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Status(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresSet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresDelete(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresGet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresFind(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - -def add_BackendServicer_to_server(servicer, server): - rpc_method_handlers = { - 'Health': grpc.unary_unary_rpc_method_handler( - servicer.Health, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Predict': grpc.unary_unary_rpc_method_handler( - servicer.Predict, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'LoadModel': grpc.unary_unary_rpc_method_handler( - servicer.LoadModel, - request_deserializer=backend__pb2.ModelOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'PredictStream': grpc.unary_stream_rpc_method_handler( - servicer.PredictStream, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Embedding': grpc.unary_unary_rpc_method_handler( - servicer.Embedding, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.EmbeddingResult.SerializeToString, - ), - 'GenerateImage': grpc.unary_unary_rpc_method_handler( - servicer.GenerateImage, - request_deserializer=backend__pb2.GenerateImageRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'AudioTranscription': grpc.unary_unary_rpc_method_handler( - servicer.AudioTranscription, - request_deserializer=backend__pb2.TranscriptRequest.FromString, - response_serializer=backend__pb2.TranscriptResult.SerializeToString, - ), - 'TTS': grpc.unary_unary_rpc_method_handler( - servicer.TTS, - request_deserializer=backend__pb2.TTSRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'TokenizeString': grpc.unary_unary_rpc_method_handler( - servicer.TokenizeString, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.TokenizationResponse.SerializeToString, - ), - 'Status': grpc.unary_unary_rpc_method_handler( - servicer.Status, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.StatusResponse.SerializeToString, - ), - 'StoresSet': grpc.unary_unary_rpc_method_handler( - servicer.StoresSet, - request_deserializer=backend__pb2.StoresSetOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresDelete': grpc.unary_unary_rpc_method_handler( - servicer.StoresDelete, - request_deserializer=backend__pb2.StoresDeleteOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresGet': grpc.unary_unary_rpc_method_handler( - servicer.StoresGet, - request_deserializer=backend__pb2.StoresGetOptions.FromString, - response_serializer=backend__pb2.StoresGetResult.SerializeToString, - ), - 'StoresFind': grpc.unary_unary_rpc_method_handler( - servicer.StoresFind, - request_deserializer=backend__pb2.StoresFindOptions.FromString, - response_serializer=backend__pb2.StoresFindResult.SerializeToString, - ), - } - generic_handler = grpc.method_handlers_generic_handler( - 'backend.Backend', rpc_method_handlers) - server.add_generic_rpc_handlers((generic_handler,)) - - - # This class is part of an EXPERIMENTAL API. -class Backend(object): - """Missing associated documentation comment in .proto file.""" - - @staticmethod - def Health(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Predict(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def LoadModel(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel', - backend__pb2.ModelOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def PredictStream(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Embedding(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.EmbeddingResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def GenerateImage(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage', - backend__pb2.GenerateImageRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def AudioTranscription(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription', - backend__pb2.TranscriptRequest.SerializeToString, - backend__pb2.TranscriptResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TTS(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS', - backend__pb2.TTSRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TokenizeString(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.TokenizationResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Status(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.StatusResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresSet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', - backend__pb2.StoresSetOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresDelete(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', - backend__pb2.StoresDeleteOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresGet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', - backend__pb2.StoresGetOptions.SerializeToString, - backend__pb2.StoresGetResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresFind(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', - backend__pb2.StoresFindOptions.SerializeToString, - backend__pb2.StoresFindResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/exllama2/Makefile b/backend/python/exllama2/Makefile index 24158151..6d6776b7 100644 --- a/backend/python/exllama2/Makefile +++ b/backend/python/exllama2/Makefile @@ -1,10 +1,20 @@ .PHONY: exllama2 -exllama2: +exllama2: protogen $(MAKE) -C ../common-env/transformers bash install.sh .PHONY: run -run: +run: protogen @echo "Running exllama2..." bash run.sh @echo "exllama2 run." + +.PHONY: protogen +protogen: backend_pb2_grpc.py backend_pb2.py + +.PHONY: protogen-clean +protogen-clean: + $(RM) backend_pb2_grpc.py backend_pb2.py + +backend_pb2_grpc.py backend_pb2.py: + python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto \ No newline at end of file diff --git a/backend/python/exllama2/backend_pb2.py b/backend/python/exllama2/backend_pb2.py deleted file mode 100644 index 24b6de3b..00000000 --- a/backend/python/exllama2/backend_pb2.py +++ /dev/null @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: backend.proto -# Protobuf Python Version: 4.25.1 -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import symbol_database as _symbol_database -from google.protobuf.internal import builder as _builder -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') - -_globals = globals() -_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) -if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' - _globals['_STORESKEY']._serialized_start=26 - _globals['_STORESKEY']._serialized_end=53 - _globals['_STORESVALUE']._serialized_start=55 - _globals['_STORESVALUE']._serialized_end=83 - _globals['_STORESSETOPTIONS']._serialized_start=85 - _globals['_STORESSETOPTIONS']._serialized_end=175 - _globals['_STORESDELETEOPTIONS']._serialized_start=177 - _globals['_STORESDELETEOPTIONS']._serialized_end=232 - _globals['_STORESGETOPTIONS']._serialized_start=234 - _globals['_STORESGETOPTIONS']._serialized_end=286 - _globals['_STORESGETRESULT']._serialized_start=288 - _globals['_STORESGETRESULT']._serialized_end=377 - _globals['_STORESFINDOPTIONS']._serialized_start=379 - _globals['_STORESFINDOPTIONS']._serialized_end=445 - _globals['_STORESFINDRESULT']._serialized_start=447 - _globals['_STORESFINDRESULT']._serialized_end=559 - _globals['_HEALTHMESSAGE']._serialized_start=561 - _globals['_HEALTHMESSAGE']._serialized_end=576 - _globals['_PREDICTOPTIONS']._serialized_start=579 - _globals['_PREDICTOPTIONS']._serialized_end=1451 - _globals['_REPLY']._serialized_start=1453 - _globals['_REPLY']._serialized_end=1477 - _globals['_MODELOPTIONS']._serialized_start=1480 - _globals['_MODELOPTIONS']._serialized_end=2552 - _globals['_RESULT']._serialized_start=2554 - _globals['_RESULT']._serialized_end=2596 - _globals['_EMBEDDINGRESULT']._serialized_start=2598 - _globals['_EMBEDDINGRESULT']._serialized_end=2635 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 - _globals['_TRANSCRIPTRESULT']._serialized_start=2706 - _globals['_TRANSCRIPTRESULT']._serialized_end=2784 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 - _globals['_TTSREQUEST']._serialized_start=3095 - _globals['_TTSREQUEST']._serialized_end=3164 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 - _globals['_MEMORYUSAGEDATA']._serialized_start=3223 - _globals['_MEMORYUSAGEDATA']._serialized_end=3365 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 - _globals['_STATUSRESPONSE']._serialized_start=3368 - _globals['_STATUSRESPONSE']._serialized_end=3541 - _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 - _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 - _globals['_MESSAGE']._serialized_start=3543 - _globals['_MESSAGE']._serialized_end=3583 - _globals['_BACKEND']._serialized_start=3586 - _globals['_BACKEND']._serialized_end=4477 -# @@protoc_insertion_point(module_scope) diff --git a/backend/python/exllama2/backend_pb2_grpc.py b/backend/python/exllama2/backend_pb2_grpc.py deleted file mode 100644 index e06fccf3..00000000 --- a/backend/python/exllama2/backend_pb2_grpc.py +++ /dev/null @@ -1,495 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc - -import backend_pb2 as backend__pb2 - - -class BackendStub(object): - """Missing associated documentation comment in .proto file.""" - - def __init__(self, channel): - """Constructor. - - Args: - channel: A grpc.Channel. - """ - self.Health = channel.unary_unary( - '/backend.Backend/Health', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Predict = channel.unary_unary( - '/backend.Backend/Predict', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.LoadModel = channel.unary_unary( - '/backend.Backend/LoadModel', - request_serializer=backend__pb2.ModelOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.PredictStream = channel.unary_stream( - '/backend.Backend/PredictStream', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Embedding = channel.unary_unary( - '/backend.Backend/Embedding', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.EmbeddingResult.FromString, - ) - self.GenerateImage = channel.unary_unary( - '/backend.Backend/GenerateImage', - request_serializer=backend__pb2.GenerateImageRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.AudioTranscription = channel.unary_unary( - '/backend.Backend/AudioTranscription', - request_serializer=backend__pb2.TranscriptRequest.SerializeToString, - response_deserializer=backend__pb2.TranscriptResult.FromString, - ) - self.TTS = channel.unary_unary( - '/backend.Backend/TTS', - request_serializer=backend__pb2.TTSRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.TokenizeString = channel.unary_unary( - '/backend.Backend/TokenizeString', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.TokenizationResponse.FromString, - ) - self.Status = channel.unary_unary( - '/backend.Backend/Status', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.StatusResponse.FromString, - ) - self.StoresSet = channel.unary_unary( - '/backend.Backend/StoresSet', - request_serializer=backend__pb2.StoresSetOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresDelete = channel.unary_unary( - '/backend.Backend/StoresDelete', - request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresGet = channel.unary_unary( - '/backend.Backend/StoresGet', - request_serializer=backend__pb2.StoresGetOptions.SerializeToString, - response_deserializer=backend__pb2.StoresGetResult.FromString, - ) - self.StoresFind = channel.unary_unary( - '/backend.Backend/StoresFind', - request_serializer=backend__pb2.StoresFindOptions.SerializeToString, - response_deserializer=backend__pb2.StoresFindResult.FromString, - ) - - -class BackendServicer(object): - """Missing associated documentation comment in .proto file.""" - - def Health(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Predict(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def LoadModel(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def PredictStream(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Embedding(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def GenerateImage(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def AudioTranscription(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TTS(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TokenizeString(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Status(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresSet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresDelete(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresGet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresFind(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - -def add_BackendServicer_to_server(servicer, server): - rpc_method_handlers = { - 'Health': grpc.unary_unary_rpc_method_handler( - servicer.Health, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Predict': grpc.unary_unary_rpc_method_handler( - servicer.Predict, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'LoadModel': grpc.unary_unary_rpc_method_handler( - servicer.LoadModel, - request_deserializer=backend__pb2.ModelOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'PredictStream': grpc.unary_stream_rpc_method_handler( - servicer.PredictStream, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Embedding': grpc.unary_unary_rpc_method_handler( - servicer.Embedding, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.EmbeddingResult.SerializeToString, - ), - 'GenerateImage': grpc.unary_unary_rpc_method_handler( - servicer.GenerateImage, - request_deserializer=backend__pb2.GenerateImageRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'AudioTranscription': grpc.unary_unary_rpc_method_handler( - servicer.AudioTranscription, - request_deserializer=backend__pb2.TranscriptRequest.FromString, - response_serializer=backend__pb2.TranscriptResult.SerializeToString, - ), - 'TTS': grpc.unary_unary_rpc_method_handler( - servicer.TTS, - request_deserializer=backend__pb2.TTSRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'TokenizeString': grpc.unary_unary_rpc_method_handler( - servicer.TokenizeString, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.TokenizationResponse.SerializeToString, - ), - 'Status': grpc.unary_unary_rpc_method_handler( - servicer.Status, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.StatusResponse.SerializeToString, - ), - 'StoresSet': grpc.unary_unary_rpc_method_handler( - servicer.StoresSet, - request_deserializer=backend__pb2.StoresSetOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresDelete': grpc.unary_unary_rpc_method_handler( - servicer.StoresDelete, - request_deserializer=backend__pb2.StoresDeleteOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresGet': grpc.unary_unary_rpc_method_handler( - servicer.StoresGet, - request_deserializer=backend__pb2.StoresGetOptions.FromString, - response_serializer=backend__pb2.StoresGetResult.SerializeToString, - ), - 'StoresFind': grpc.unary_unary_rpc_method_handler( - servicer.StoresFind, - request_deserializer=backend__pb2.StoresFindOptions.FromString, - response_serializer=backend__pb2.StoresFindResult.SerializeToString, - ), - } - generic_handler = grpc.method_handlers_generic_handler( - 'backend.Backend', rpc_method_handlers) - server.add_generic_rpc_handlers((generic_handler,)) - - - # This class is part of an EXPERIMENTAL API. -class Backend(object): - """Missing associated documentation comment in .proto file.""" - - @staticmethod - def Health(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Predict(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def LoadModel(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel', - backend__pb2.ModelOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def PredictStream(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Embedding(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.EmbeddingResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def GenerateImage(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage', - backend__pb2.GenerateImageRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def AudioTranscription(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription', - backend__pb2.TranscriptRequest.SerializeToString, - backend__pb2.TranscriptResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TTS(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS', - backend__pb2.TTSRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TokenizeString(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.TokenizationResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Status(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.StatusResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresSet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', - backend__pb2.StoresSetOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresDelete(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', - backend__pb2.StoresDeleteOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresGet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', - backend__pb2.StoresGetOptions.SerializeToString, - backend__pb2.StoresGetResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresFind(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', - backend__pb2.StoresFindOptions.SerializeToString, - backend__pb2.StoresFindResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/mamba/Makefile b/backend/python/mamba/Makefile index 3ff00346..ca18e609 100644 --- a/backend/python/mamba/Makefile +++ b/backend/python/mamba/Makefile @@ -1,16 +1,26 @@ .PHONY: mamba -mamba: +mamba: protogen $(MAKE) -C ../common-env/transformers bash install.sh .PHONY: run -run: +run: protogen @echo "Running mamba..." bash run.sh @echo "mamba run." .PHONY: test -test: +test: protogen @echo "Testing mamba..." bash test.sh - @echo "mamba tested." \ No newline at end of file + @echo "mamba tested." + +.PHONY: protogen +protogen: backend_pb2_grpc.py backend_pb2.py + +.PHONY: protogen-clean +protogen-clean: + $(RM) backend_pb2_grpc.py backend_pb2.py + +backend_pb2_grpc.py backend_pb2.py: + python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto \ No newline at end of file diff --git a/backend/python/mamba/backend_pb2.py b/backend/python/mamba/backend_pb2.py deleted file mode 100644 index 24b6de3b..00000000 --- a/backend/python/mamba/backend_pb2.py +++ /dev/null @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: backend.proto -# Protobuf Python Version: 4.25.1 -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import symbol_database as _symbol_database -from google.protobuf.internal import builder as _builder -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') - -_globals = globals() -_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) -if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' - _globals['_STORESKEY']._serialized_start=26 - _globals['_STORESKEY']._serialized_end=53 - _globals['_STORESVALUE']._serialized_start=55 - _globals['_STORESVALUE']._serialized_end=83 - _globals['_STORESSETOPTIONS']._serialized_start=85 - _globals['_STORESSETOPTIONS']._serialized_end=175 - _globals['_STORESDELETEOPTIONS']._serialized_start=177 - _globals['_STORESDELETEOPTIONS']._serialized_end=232 - _globals['_STORESGETOPTIONS']._serialized_start=234 - _globals['_STORESGETOPTIONS']._serialized_end=286 - _globals['_STORESGETRESULT']._serialized_start=288 - _globals['_STORESGETRESULT']._serialized_end=377 - _globals['_STORESFINDOPTIONS']._serialized_start=379 - _globals['_STORESFINDOPTIONS']._serialized_end=445 - _globals['_STORESFINDRESULT']._serialized_start=447 - _globals['_STORESFINDRESULT']._serialized_end=559 - _globals['_HEALTHMESSAGE']._serialized_start=561 - _globals['_HEALTHMESSAGE']._serialized_end=576 - _globals['_PREDICTOPTIONS']._serialized_start=579 - _globals['_PREDICTOPTIONS']._serialized_end=1451 - _globals['_REPLY']._serialized_start=1453 - _globals['_REPLY']._serialized_end=1477 - _globals['_MODELOPTIONS']._serialized_start=1480 - _globals['_MODELOPTIONS']._serialized_end=2552 - _globals['_RESULT']._serialized_start=2554 - _globals['_RESULT']._serialized_end=2596 - _globals['_EMBEDDINGRESULT']._serialized_start=2598 - _globals['_EMBEDDINGRESULT']._serialized_end=2635 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 - _globals['_TRANSCRIPTRESULT']._serialized_start=2706 - _globals['_TRANSCRIPTRESULT']._serialized_end=2784 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 - _globals['_TTSREQUEST']._serialized_start=3095 - _globals['_TTSREQUEST']._serialized_end=3164 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 - _globals['_MEMORYUSAGEDATA']._serialized_start=3223 - _globals['_MEMORYUSAGEDATA']._serialized_end=3365 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 - _globals['_STATUSRESPONSE']._serialized_start=3368 - _globals['_STATUSRESPONSE']._serialized_end=3541 - _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 - _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 - _globals['_MESSAGE']._serialized_start=3543 - _globals['_MESSAGE']._serialized_end=3583 - _globals['_BACKEND']._serialized_start=3586 - _globals['_BACKEND']._serialized_end=4477 -# @@protoc_insertion_point(module_scope) diff --git a/backend/python/mamba/backend_pb2_grpc.py b/backend/python/mamba/backend_pb2_grpc.py deleted file mode 100644 index e06fccf3..00000000 --- a/backend/python/mamba/backend_pb2_grpc.py +++ /dev/null @@ -1,495 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc - -import backend_pb2 as backend__pb2 - - -class BackendStub(object): - """Missing associated documentation comment in .proto file.""" - - def __init__(self, channel): - """Constructor. - - Args: - channel: A grpc.Channel. - """ - self.Health = channel.unary_unary( - '/backend.Backend/Health', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Predict = channel.unary_unary( - '/backend.Backend/Predict', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.LoadModel = channel.unary_unary( - '/backend.Backend/LoadModel', - request_serializer=backend__pb2.ModelOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.PredictStream = channel.unary_stream( - '/backend.Backend/PredictStream', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Embedding = channel.unary_unary( - '/backend.Backend/Embedding', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.EmbeddingResult.FromString, - ) - self.GenerateImage = channel.unary_unary( - '/backend.Backend/GenerateImage', - request_serializer=backend__pb2.GenerateImageRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.AudioTranscription = channel.unary_unary( - '/backend.Backend/AudioTranscription', - request_serializer=backend__pb2.TranscriptRequest.SerializeToString, - response_deserializer=backend__pb2.TranscriptResult.FromString, - ) - self.TTS = channel.unary_unary( - '/backend.Backend/TTS', - request_serializer=backend__pb2.TTSRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.TokenizeString = channel.unary_unary( - '/backend.Backend/TokenizeString', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.TokenizationResponse.FromString, - ) - self.Status = channel.unary_unary( - '/backend.Backend/Status', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.StatusResponse.FromString, - ) - self.StoresSet = channel.unary_unary( - '/backend.Backend/StoresSet', - request_serializer=backend__pb2.StoresSetOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresDelete = channel.unary_unary( - '/backend.Backend/StoresDelete', - request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresGet = channel.unary_unary( - '/backend.Backend/StoresGet', - request_serializer=backend__pb2.StoresGetOptions.SerializeToString, - response_deserializer=backend__pb2.StoresGetResult.FromString, - ) - self.StoresFind = channel.unary_unary( - '/backend.Backend/StoresFind', - request_serializer=backend__pb2.StoresFindOptions.SerializeToString, - response_deserializer=backend__pb2.StoresFindResult.FromString, - ) - - -class BackendServicer(object): - """Missing associated documentation comment in .proto file.""" - - def Health(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Predict(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def LoadModel(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def PredictStream(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Embedding(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def GenerateImage(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def AudioTranscription(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TTS(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TokenizeString(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Status(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresSet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresDelete(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresGet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresFind(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - -def add_BackendServicer_to_server(servicer, server): - rpc_method_handlers = { - 'Health': grpc.unary_unary_rpc_method_handler( - servicer.Health, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Predict': grpc.unary_unary_rpc_method_handler( - servicer.Predict, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'LoadModel': grpc.unary_unary_rpc_method_handler( - servicer.LoadModel, - request_deserializer=backend__pb2.ModelOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'PredictStream': grpc.unary_stream_rpc_method_handler( - servicer.PredictStream, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Embedding': grpc.unary_unary_rpc_method_handler( - servicer.Embedding, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.EmbeddingResult.SerializeToString, - ), - 'GenerateImage': grpc.unary_unary_rpc_method_handler( - servicer.GenerateImage, - request_deserializer=backend__pb2.GenerateImageRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'AudioTranscription': grpc.unary_unary_rpc_method_handler( - servicer.AudioTranscription, - request_deserializer=backend__pb2.TranscriptRequest.FromString, - response_serializer=backend__pb2.TranscriptResult.SerializeToString, - ), - 'TTS': grpc.unary_unary_rpc_method_handler( - servicer.TTS, - request_deserializer=backend__pb2.TTSRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'TokenizeString': grpc.unary_unary_rpc_method_handler( - servicer.TokenizeString, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.TokenizationResponse.SerializeToString, - ), - 'Status': grpc.unary_unary_rpc_method_handler( - servicer.Status, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.StatusResponse.SerializeToString, - ), - 'StoresSet': grpc.unary_unary_rpc_method_handler( - servicer.StoresSet, - request_deserializer=backend__pb2.StoresSetOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresDelete': grpc.unary_unary_rpc_method_handler( - servicer.StoresDelete, - request_deserializer=backend__pb2.StoresDeleteOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresGet': grpc.unary_unary_rpc_method_handler( - servicer.StoresGet, - request_deserializer=backend__pb2.StoresGetOptions.FromString, - response_serializer=backend__pb2.StoresGetResult.SerializeToString, - ), - 'StoresFind': grpc.unary_unary_rpc_method_handler( - servicer.StoresFind, - request_deserializer=backend__pb2.StoresFindOptions.FromString, - response_serializer=backend__pb2.StoresFindResult.SerializeToString, - ), - } - generic_handler = grpc.method_handlers_generic_handler( - 'backend.Backend', rpc_method_handlers) - server.add_generic_rpc_handlers((generic_handler,)) - - - # This class is part of an EXPERIMENTAL API. -class Backend(object): - """Missing associated documentation comment in .proto file.""" - - @staticmethod - def Health(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Predict(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def LoadModel(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel', - backend__pb2.ModelOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def PredictStream(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Embedding(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.EmbeddingResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def GenerateImage(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage', - backend__pb2.GenerateImageRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def AudioTranscription(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription', - backend__pb2.TranscriptRequest.SerializeToString, - backend__pb2.TranscriptResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TTS(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS', - backend__pb2.TTSRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TokenizeString(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.TokenizationResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Status(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.StatusResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresSet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', - backend__pb2.StoresSetOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresDelete(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', - backend__pb2.StoresDeleteOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresGet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', - backend__pb2.StoresGetOptions.SerializeToString, - backend__pb2.StoresGetResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresFind(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', - backend__pb2.StoresFindOptions.SerializeToString, - backend__pb2.StoresFindResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/petals/Makefile b/backend/python/petals/Makefile index aa7778e1..0ed64a07 100644 --- a/backend/python/petals/Makefile +++ b/backend/python/petals/Makefile @@ -1,17 +1,27 @@ .PHONY: petals -petals: +petals: protogen @echo "Creating virtual environment..." bash install.sh "petals.yml" @echo "Virtual environment created." .PHONY: run -run: +run: protogen @echo "Running petals..." bash run.sh @echo "petals run." .PHONY: test -test: +test: protogen @echo "Testing petals..." bash test.sh @echo "petals tested." + +.PHONY: protogen +protogen: backend_pb2_grpc.py backend_pb2.py + +.PHONY: protogen-clean +protogen-clean: + $(RM) backend_pb2_grpc.py backend_pb2.py + +backend_pb2_grpc.py backend_pb2.py: + python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto \ No newline at end of file diff --git a/backend/python/petals/backend_pb2.py b/backend/python/petals/backend_pb2.py deleted file mode 100644 index 24b6de3b..00000000 --- a/backend/python/petals/backend_pb2.py +++ /dev/null @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: backend.proto -# Protobuf Python Version: 4.25.1 -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import symbol_database as _symbol_database -from google.protobuf.internal import builder as _builder -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') - -_globals = globals() -_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) -if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' - _globals['_STORESKEY']._serialized_start=26 - _globals['_STORESKEY']._serialized_end=53 - _globals['_STORESVALUE']._serialized_start=55 - _globals['_STORESVALUE']._serialized_end=83 - _globals['_STORESSETOPTIONS']._serialized_start=85 - _globals['_STORESSETOPTIONS']._serialized_end=175 - _globals['_STORESDELETEOPTIONS']._serialized_start=177 - _globals['_STORESDELETEOPTIONS']._serialized_end=232 - _globals['_STORESGETOPTIONS']._serialized_start=234 - _globals['_STORESGETOPTIONS']._serialized_end=286 - _globals['_STORESGETRESULT']._serialized_start=288 - _globals['_STORESGETRESULT']._serialized_end=377 - _globals['_STORESFINDOPTIONS']._serialized_start=379 - _globals['_STORESFINDOPTIONS']._serialized_end=445 - _globals['_STORESFINDRESULT']._serialized_start=447 - _globals['_STORESFINDRESULT']._serialized_end=559 - _globals['_HEALTHMESSAGE']._serialized_start=561 - _globals['_HEALTHMESSAGE']._serialized_end=576 - _globals['_PREDICTOPTIONS']._serialized_start=579 - _globals['_PREDICTOPTIONS']._serialized_end=1451 - _globals['_REPLY']._serialized_start=1453 - _globals['_REPLY']._serialized_end=1477 - _globals['_MODELOPTIONS']._serialized_start=1480 - _globals['_MODELOPTIONS']._serialized_end=2552 - _globals['_RESULT']._serialized_start=2554 - _globals['_RESULT']._serialized_end=2596 - _globals['_EMBEDDINGRESULT']._serialized_start=2598 - _globals['_EMBEDDINGRESULT']._serialized_end=2635 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 - _globals['_TRANSCRIPTRESULT']._serialized_start=2706 - _globals['_TRANSCRIPTRESULT']._serialized_end=2784 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 - _globals['_TTSREQUEST']._serialized_start=3095 - _globals['_TTSREQUEST']._serialized_end=3164 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 - _globals['_MEMORYUSAGEDATA']._serialized_start=3223 - _globals['_MEMORYUSAGEDATA']._serialized_end=3365 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 - _globals['_STATUSRESPONSE']._serialized_start=3368 - _globals['_STATUSRESPONSE']._serialized_end=3541 - _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 - _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 - _globals['_MESSAGE']._serialized_start=3543 - _globals['_MESSAGE']._serialized_end=3583 - _globals['_BACKEND']._serialized_start=3586 - _globals['_BACKEND']._serialized_end=4477 -# @@protoc_insertion_point(module_scope) diff --git a/backend/python/petals/backend_pb2_grpc.py b/backend/python/petals/backend_pb2_grpc.py deleted file mode 100644 index e06fccf3..00000000 --- a/backend/python/petals/backend_pb2_grpc.py +++ /dev/null @@ -1,495 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc - -import backend_pb2 as backend__pb2 - - -class BackendStub(object): - """Missing associated documentation comment in .proto file.""" - - def __init__(self, channel): - """Constructor. - - Args: - channel: A grpc.Channel. - """ - self.Health = channel.unary_unary( - '/backend.Backend/Health', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Predict = channel.unary_unary( - '/backend.Backend/Predict', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.LoadModel = channel.unary_unary( - '/backend.Backend/LoadModel', - request_serializer=backend__pb2.ModelOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.PredictStream = channel.unary_stream( - '/backend.Backend/PredictStream', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Embedding = channel.unary_unary( - '/backend.Backend/Embedding', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.EmbeddingResult.FromString, - ) - self.GenerateImage = channel.unary_unary( - '/backend.Backend/GenerateImage', - request_serializer=backend__pb2.GenerateImageRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.AudioTranscription = channel.unary_unary( - '/backend.Backend/AudioTranscription', - request_serializer=backend__pb2.TranscriptRequest.SerializeToString, - response_deserializer=backend__pb2.TranscriptResult.FromString, - ) - self.TTS = channel.unary_unary( - '/backend.Backend/TTS', - request_serializer=backend__pb2.TTSRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.TokenizeString = channel.unary_unary( - '/backend.Backend/TokenizeString', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.TokenizationResponse.FromString, - ) - self.Status = channel.unary_unary( - '/backend.Backend/Status', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.StatusResponse.FromString, - ) - self.StoresSet = channel.unary_unary( - '/backend.Backend/StoresSet', - request_serializer=backend__pb2.StoresSetOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresDelete = channel.unary_unary( - '/backend.Backend/StoresDelete', - request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresGet = channel.unary_unary( - '/backend.Backend/StoresGet', - request_serializer=backend__pb2.StoresGetOptions.SerializeToString, - response_deserializer=backend__pb2.StoresGetResult.FromString, - ) - self.StoresFind = channel.unary_unary( - '/backend.Backend/StoresFind', - request_serializer=backend__pb2.StoresFindOptions.SerializeToString, - response_deserializer=backend__pb2.StoresFindResult.FromString, - ) - - -class BackendServicer(object): - """Missing associated documentation comment in .proto file.""" - - def Health(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Predict(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def LoadModel(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def PredictStream(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Embedding(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def GenerateImage(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def AudioTranscription(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TTS(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TokenizeString(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Status(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresSet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresDelete(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresGet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresFind(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - -def add_BackendServicer_to_server(servicer, server): - rpc_method_handlers = { - 'Health': grpc.unary_unary_rpc_method_handler( - servicer.Health, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Predict': grpc.unary_unary_rpc_method_handler( - servicer.Predict, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'LoadModel': grpc.unary_unary_rpc_method_handler( - servicer.LoadModel, - request_deserializer=backend__pb2.ModelOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'PredictStream': grpc.unary_stream_rpc_method_handler( - servicer.PredictStream, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Embedding': grpc.unary_unary_rpc_method_handler( - servicer.Embedding, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.EmbeddingResult.SerializeToString, - ), - 'GenerateImage': grpc.unary_unary_rpc_method_handler( - servicer.GenerateImage, - request_deserializer=backend__pb2.GenerateImageRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'AudioTranscription': grpc.unary_unary_rpc_method_handler( - servicer.AudioTranscription, - request_deserializer=backend__pb2.TranscriptRequest.FromString, - response_serializer=backend__pb2.TranscriptResult.SerializeToString, - ), - 'TTS': grpc.unary_unary_rpc_method_handler( - servicer.TTS, - request_deserializer=backend__pb2.TTSRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'TokenizeString': grpc.unary_unary_rpc_method_handler( - servicer.TokenizeString, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.TokenizationResponse.SerializeToString, - ), - 'Status': grpc.unary_unary_rpc_method_handler( - servicer.Status, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.StatusResponse.SerializeToString, - ), - 'StoresSet': grpc.unary_unary_rpc_method_handler( - servicer.StoresSet, - request_deserializer=backend__pb2.StoresSetOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresDelete': grpc.unary_unary_rpc_method_handler( - servicer.StoresDelete, - request_deserializer=backend__pb2.StoresDeleteOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresGet': grpc.unary_unary_rpc_method_handler( - servicer.StoresGet, - request_deserializer=backend__pb2.StoresGetOptions.FromString, - response_serializer=backend__pb2.StoresGetResult.SerializeToString, - ), - 'StoresFind': grpc.unary_unary_rpc_method_handler( - servicer.StoresFind, - request_deserializer=backend__pb2.StoresFindOptions.FromString, - response_serializer=backend__pb2.StoresFindResult.SerializeToString, - ), - } - generic_handler = grpc.method_handlers_generic_handler( - 'backend.Backend', rpc_method_handlers) - server.add_generic_rpc_handlers((generic_handler,)) - - - # This class is part of an EXPERIMENTAL API. -class Backend(object): - """Missing associated documentation comment in .proto file.""" - - @staticmethod - def Health(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Predict(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def LoadModel(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel', - backend__pb2.ModelOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def PredictStream(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Embedding(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.EmbeddingResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def GenerateImage(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage', - backend__pb2.GenerateImageRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def AudioTranscription(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription', - backend__pb2.TranscriptRequest.SerializeToString, - backend__pb2.TranscriptResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TTS(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS', - backend__pb2.TTSRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TokenizeString(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.TokenizationResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Status(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.StatusResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresSet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', - backend__pb2.StoresSetOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresDelete(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', - backend__pb2.StoresDeleteOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresGet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', - backend__pb2.StoresGetOptions.SerializeToString, - backend__pb2.StoresGetResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresFind(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', - backend__pb2.StoresFindOptions.SerializeToString, - backend__pb2.StoresFindResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/sentencetransformers/Makefile b/backend/python/sentencetransformers/Makefile index 7dbde5cf..ac442897 100644 --- a/backend/python/sentencetransformers/Makefile +++ b/backend/python/sentencetransformers/Makefile @@ -1,17 +1,27 @@ .PHONY: sentencetransformers -sentencetransformers: +sentencetransformers: protogen $(MAKE) -C ../common-env/transformers .PHONY: run -run: +run: protogen @echo "Running sentencetransformers..." bash run.sh @echo "sentencetransformers run." # It is not working well by using command line. It only6 works with IDE like VSCode. .PHONY: test -test: +test: protogen @echo "Testing sentencetransformers..." bash test.sh @echo "sentencetransformers tested." + +.PHONY: protogen +protogen: backend_pb2_grpc.py backend_pb2.py + +.PHONY: protogen-clean +protogen-clean: + $(RM) backend_pb2_grpc.py backend_pb2.py + +backend_pb2_grpc.py backend_pb2.py: + python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto \ No newline at end of file diff --git a/backend/python/sentencetransformers/backend_pb2.py b/backend/python/sentencetransformers/backend_pb2.py deleted file mode 100644 index 24b6de3b..00000000 --- a/backend/python/sentencetransformers/backend_pb2.py +++ /dev/null @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: backend.proto -# Protobuf Python Version: 4.25.1 -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import symbol_database as _symbol_database -from google.protobuf.internal import builder as _builder -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') - -_globals = globals() -_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) -if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' - _globals['_STORESKEY']._serialized_start=26 - _globals['_STORESKEY']._serialized_end=53 - _globals['_STORESVALUE']._serialized_start=55 - _globals['_STORESVALUE']._serialized_end=83 - _globals['_STORESSETOPTIONS']._serialized_start=85 - _globals['_STORESSETOPTIONS']._serialized_end=175 - _globals['_STORESDELETEOPTIONS']._serialized_start=177 - _globals['_STORESDELETEOPTIONS']._serialized_end=232 - _globals['_STORESGETOPTIONS']._serialized_start=234 - _globals['_STORESGETOPTIONS']._serialized_end=286 - _globals['_STORESGETRESULT']._serialized_start=288 - _globals['_STORESGETRESULT']._serialized_end=377 - _globals['_STORESFINDOPTIONS']._serialized_start=379 - _globals['_STORESFINDOPTIONS']._serialized_end=445 - _globals['_STORESFINDRESULT']._serialized_start=447 - _globals['_STORESFINDRESULT']._serialized_end=559 - _globals['_HEALTHMESSAGE']._serialized_start=561 - _globals['_HEALTHMESSAGE']._serialized_end=576 - _globals['_PREDICTOPTIONS']._serialized_start=579 - _globals['_PREDICTOPTIONS']._serialized_end=1451 - _globals['_REPLY']._serialized_start=1453 - _globals['_REPLY']._serialized_end=1477 - _globals['_MODELOPTIONS']._serialized_start=1480 - _globals['_MODELOPTIONS']._serialized_end=2552 - _globals['_RESULT']._serialized_start=2554 - _globals['_RESULT']._serialized_end=2596 - _globals['_EMBEDDINGRESULT']._serialized_start=2598 - _globals['_EMBEDDINGRESULT']._serialized_end=2635 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 - _globals['_TRANSCRIPTRESULT']._serialized_start=2706 - _globals['_TRANSCRIPTRESULT']._serialized_end=2784 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 - _globals['_TTSREQUEST']._serialized_start=3095 - _globals['_TTSREQUEST']._serialized_end=3164 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 - _globals['_MEMORYUSAGEDATA']._serialized_start=3223 - _globals['_MEMORYUSAGEDATA']._serialized_end=3365 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 - _globals['_STATUSRESPONSE']._serialized_start=3368 - _globals['_STATUSRESPONSE']._serialized_end=3541 - _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 - _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 - _globals['_MESSAGE']._serialized_start=3543 - _globals['_MESSAGE']._serialized_end=3583 - _globals['_BACKEND']._serialized_start=3586 - _globals['_BACKEND']._serialized_end=4477 -# @@protoc_insertion_point(module_scope) diff --git a/backend/python/sentencetransformers/backend_pb2_grpc.py b/backend/python/sentencetransformers/backend_pb2_grpc.py deleted file mode 100644 index e06fccf3..00000000 --- a/backend/python/sentencetransformers/backend_pb2_grpc.py +++ /dev/null @@ -1,495 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc - -import backend_pb2 as backend__pb2 - - -class BackendStub(object): - """Missing associated documentation comment in .proto file.""" - - def __init__(self, channel): - """Constructor. - - Args: - channel: A grpc.Channel. - """ - self.Health = channel.unary_unary( - '/backend.Backend/Health', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Predict = channel.unary_unary( - '/backend.Backend/Predict', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.LoadModel = channel.unary_unary( - '/backend.Backend/LoadModel', - request_serializer=backend__pb2.ModelOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.PredictStream = channel.unary_stream( - '/backend.Backend/PredictStream', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Embedding = channel.unary_unary( - '/backend.Backend/Embedding', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.EmbeddingResult.FromString, - ) - self.GenerateImage = channel.unary_unary( - '/backend.Backend/GenerateImage', - request_serializer=backend__pb2.GenerateImageRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.AudioTranscription = channel.unary_unary( - '/backend.Backend/AudioTranscription', - request_serializer=backend__pb2.TranscriptRequest.SerializeToString, - response_deserializer=backend__pb2.TranscriptResult.FromString, - ) - self.TTS = channel.unary_unary( - '/backend.Backend/TTS', - request_serializer=backend__pb2.TTSRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.TokenizeString = channel.unary_unary( - '/backend.Backend/TokenizeString', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.TokenizationResponse.FromString, - ) - self.Status = channel.unary_unary( - '/backend.Backend/Status', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.StatusResponse.FromString, - ) - self.StoresSet = channel.unary_unary( - '/backend.Backend/StoresSet', - request_serializer=backend__pb2.StoresSetOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresDelete = channel.unary_unary( - '/backend.Backend/StoresDelete', - request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresGet = channel.unary_unary( - '/backend.Backend/StoresGet', - request_serializer=backend__pb2.StoresGetOptions.SerializeToString, - response_deserializer=backend__pb2.StoresGetResult.FromString, - ) - self.StoresFind = channel.unary_unary( - '/backend.Backend/StoresFind', - request_serializer=backend__pb2.StoresFindOptions.SerializeToString, - response_deserializer=backend__pb2.StoresFindResult.FromString, - ) - - -class BackendServicer(object): - """Missing associated documentation comment in .proto file.""" - - def Health(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Predict(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def LoadModel(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def PredictStream(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Embedding(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def GenerateImage(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def AudioTranscription(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TTS(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TokenizeString(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Status(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresSet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresDelete(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresGet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresFind(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - -def add_BackendServicer_to_server(servicer, server): - rpc_method_handlers = { - 'Health': grpc.unary_unary_rpc_method_handler( - servicer.Health, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Predict': grpc.unary_unary_rpc_method_handler( - servicer.Predict, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'LoadModel': grpc.unary_unary_rpc_method_handler( - servicer.LoadModel, - request_deserializer=backend__pb2.ModelOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'PredictStream': grpc.unary_stream_rpc_method_handler( - servicer.PredictStream, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Embedding': grpc.unary_unary_rpc_method_handler( - servicer.Embedding, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.EmbeddingResult.SerializeToString, - ), - 'GenerateImage': grpc.unary_unary_rpc_method_handler( - servicer.GenerateImage, - request_deserializer=backend__pb2.GenerateImageRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'AudioTranscription': grpc.unary_unary_rpc_method_handler( - servicer.AudioTranscription, - request_deserializer=backend__pb2.TranscriptRequest.FromString, - response_serializer=backend__pb2.TranscriptResult.SerializeToString, - ), - 'TTS': grpc.unary_unary_rpc_method_handler( - servicer.TTS, - request_deserializer=backend__pb2.TTSRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'TokenizeString': grpc.unary_unary_rpc_method_handler( - servicer.TokenizeString, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.TokenizationResponse.SerializeToString, - ), - 'Status': grpc.unary_unary_rpc_method_handler( - servicer.Status, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.StatusResponse.SerializeToString, - ), - 'StoresSet': grpc.unary_unary_rpc_method_handler( - servicer.StoresSet, - request_deserializer=backend__pb2.StoresSetOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresDelete': grpc.unary_unary_rpc_method_handler( - servicer.StoresDelete, - request_deserializer=backend__pb2.StoresDeleteOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresGet': grpc.unary_unary_rpc_method_handler( - servicer.StoresGet, - request_deserializer=backend__pb2.StoresGetOptions.FromString, - response_serializer=backend__pb2.StoresGetResult.SerializeToString, - ), - 'StoresFind': grpc.unary_unary_rpc_method_handler( - servicer.StoresFind, - request_deserializer=backend__pb2.StoresFindOptions.FromString, - response_serializer=backend__pb2.StoresFindResult.SerializeToString, - ), - } - generic_handler = grpc.method_handlers_generic_handler( - 'backend.Backend', rpc_method_handlers) - server.add_generic_rpc_handlers((generic_handler,)) - - - # This class is part of an EXPERIMENTAL API. -class Backend(object): - """Missing associated documentation comment in .proto file.""" - - @staticmethod - def Health(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Predict(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def LoadModel(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel', - backend__pb2.ModelOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def PredictStream(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Embedding(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.EmbeddingResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def GenerateImage(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage', - backend__pb2.GenerateImageRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def AudioTranscription(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription', - backend__pb2.TranscriptRequest.SerializeToString, - backend__pb2.TranscriptResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TTS(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS', - backend__pb2.TTSRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TokenizeString(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.TokenizationResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Status(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.StatusResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresSet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', - backend__pb2.StoresSetOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresDelete(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', - backend__pb2.StoresDeleteOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresGet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', - backend__pb2.StoresGetOptions.SerializeToString, - backend__pb2.StoresGetResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresFind(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', - backend__pb2.StoresFindOptions.SerializeToString, - backend__pb2.StoresFindResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/transformers-musicgen/Makefile b/backend/python/transformers-musicgen/Makefile index a2969d84..e28a356d 100644 --- a/backend/python/transformers-musicgen/Makefile +++ b/backend/python/transformers-musicgen/Makefile @@ -1,16 +1,25 @@ - .PHONY: transformers-musicgen -transformers-musicgen: +transformers-musicgen: protogen $(MAKE) -C ../common-env/transformers .PHONY: run -run: +run: protogen @echo "Running transformers..." bash run.sh @echo "transformers run." .PHONY: test -test: +test: protogen @echo "Testing transformers..." bash test.sh @echo "transformers tested." + +.PHONY: protogen +protogen: backend_pb2_grpc.py backend_pb2.py + +.PHONY: protogen-clean +protogen-clean: + $(RM) backend_pb2_grpc.py backend_pb2.py + +backend_pb2_grpc.py backend_pb2.py: + python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto \ No newline at end of file diff --git a/backend/python/transformers-musicgen/backend_pb2.py b/backend/python/transformers-musicgen/backend_pb2.py deleted file mode 100644 index 24b6de3b..00000000 --- a/backend/python/transformers-musicgen/backend_pb2.py +++ /dev/null @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: backend.proto -# Protobuf Python Version: 4.25.1 -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import symbol_database as _symbol_database -from google.protobuf.internal import builder as _builder -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') - -_globals = globals() -_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) -if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' - _globals['_STORESKEY']._serialized_start=26 - _globals['_STORESKEY']._serialized_end=53 - _globals['_STORESVALUE']._serialized_start=55 - _globals['_STORESVALUE']._serialized_end=83 - _globals['_STORESSETOPTIONS']._serialized_start=85 - _globals['_STORESSETOPTIONS']._serialized_end=175 - _globals['_STORESDELETEOPTIONS']._serialized_start=177 - _globals['_STORESDELETEOPTIONS']._serialized_end=232 - _globals['_STORESGETOPTIONS']._serialized_start=234 - _globals['_STORESGETOPTIONS']._serialized_end=286 - _globals['_STORESGETRESULT']._serialized_start=288 - _globals['_STORESGETRESULT']._serialized_end=377 - _globals['_STORESFINDOPTIONS']._serialized_start=379 - _globals['_STORESFINDOPTIONS']._serialized_end=445 - _globals['_STORESFINDRESULT']._serialized_start=447 - _globals['_STORESFINDRESULT']._serialized_end=559 - _globals['_HEALTHMESSAGE']._serialized_start=561 - _globals['_HEALTHMESSAGE']._serialized_end=576 - _globals['_PREDICTOPTIONS']._serialized_start=579 - _globals['_PREDICTOPTIONS']._serialized_end=1451 - _globals['_REPLY']._serialized_start=1453 - _globals['_REPLY']._serialized_end=1477 - _globals['_MODELOPTIONS']._serialized_start=1480 - _globals['_MODELOPTIONS']._serialized_end=2552 - _globals['_RESULT']._serialized_start=2554 - _globals['_RESULT']._serialized_end=2596 - _globals['_EMBEDDINGRESULT']._serialized_start=2598 - _globals['_EMBEDDINGRESULT']._serialized_end=2635 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 - _globals['_TRANSCRIPTRESULT']._serialized_start=2706 - _globals['_TRANSCRIPTRESULT']._serialized_end=2784 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 - _globals['_TTSREQUEST']._serialized_start=3095 - _globals['_TTSREQUEST']._serialized_end=3164 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 - _globals['_MEMORYUSAGEDATA']._serialized_start=3223 - _globals['_MEMORYUSAGEDATA']._serialized_end=3365 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 - _globals['_STATUSRESPONSE']._serialized_start=3368 - _globals['_STATUSRESPONSE']._serialized_end=3541 - _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 - _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 - _globals['_MESSAGE']._serialized_start=3543 - _globals['_MESSAGE']._serialized_end=3583 - _globals['_BACKEND']._serialized_start=3586 - _globals['_BACKEND']._serialized_end=4477 -# @@protoc_insertion_point(module_scope) diff --git a/backend/python/transformers-musicgen/backend_pb2_grpc.py b/backend/python/transformers-musicgen/backend_pb2_grpc.py deleted file mode 100644 index e06fccf3..00000000 --- a/backend/python/transformers-musicgen/backend_pb2_grpc.py +++ /dev/null @@ -1,495 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc - -import backend_pb2 as backend__pb2 - - -class BackendStub(object): - """Missing associated documentation comment in .proto file.""" - - def __init__(self, channel): - """Constructor. - - Args: - channel: A grpc.Channel. - """ - self.Health = channel.unary_unary( - '/backend.Backend/Health', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Predict = channel.unary_unary( - '/backend.Backend/Predict', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.LoadModel = channel.unary_unary( - '/backend.Backend/LoadModel', - request_serializer=backend__pb2.ModelOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.PredictStream = channel.unary_stream( - '/backend.Backend/PredictStream', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Embedding = channel.unary_unary( - '/backend.Backend/Embedding', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.EmbeddingResult.FromString, - ) - self.GenerateImage = channel.unary_unary( - '/backend.Backend/GenerateImage', - request_serializer=backend__pb2.GenerateImageRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.AudioTranscription = channel.unary_unary( - '/backend.Backend/AudioTranscription', - request_serializer=backend__pb2.TranscriptRequest.SerializeToString, - response_deserializer=backend__pb2.TranscriptResult.FromString, - ) - self.TTS = channel.unary_unary( - '/backend.Backend/TTS', - request_serializer=backend__pb2.TTSRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.TokenizeString = channel.unary_unary( - '/backend.Backend/TokenizeString', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.TokenizationResponse.FromString, - ) - self.Status = channel.unary_unary( - '/backend.Backend/Status', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.StatusResponse.FromString, - ) - self.StoresSet = channel.unary_unary( - '/backend.Backend/StoresSet', - request_serializer=backend__pb2.StoresSetOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresDelete = channel.unary_unary( - '/backend.Backend/StoresDelete', - request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresGet = channel.unary_unary( - '/backend.Backend/StoresGet', - request_serializer=backend__pb2.StoresGetOptions.SerializeToString, - response_deserializer=backend__pb2.StoresGetResult.FromString, - ) - self.StoresFind = channel.unary_unary( - '/backend.Backend/StoresFind', - request_serializer=backend__pb2.StoresFindOptions.SerializeToString, - response_deserializer=backend__pb2.StoresFindResult.FromString, - ) - - -class BackendServicer(object): - """Missing associated documentation comment in .proto file.""" - - def Health(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Predict(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def LoadModel(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def PredictStream(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Embedding(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def GenerateImage(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def AudioTranscription(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TTS(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TokenizeString(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Status(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresSet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresDelete(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresGet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresFind(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - -def add_BackendServicer_to_server(servicer, server): - rpc_method_handlers = { - 'Health': grpc.unary_unary_rpc_method_handler( - servicer.Health, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Predict': grpc.unary_unary_rpc_method_handler( - servicer.Predict, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'LoadModel': grpc.unary_unary_rpc_method_handler( - servicer.LoadModel, - request_deserializer=backend__pb2.ModelOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'PredictStream': grpc.unary_stream_rpc_method_handler( - servicer.PredictStream, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Embedding': grpc.unary_unary_rpc_method_handler( - servicer.Embedding, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.EmbeddingResult.SerializeToString, - ), - 'GenerateImage': grpc.unary_unary_rpc_method_handler( - servicer.GenerateImage, - request_deserializer=backend__pb2.GenerateImageRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'AudioTranscription': grpc.unary_unary_rpc_method_handler( - servicer.AudioTranscription, - request_deserializer=backend__pb2.TranscriptRequest.FromString, - response_serializer=backend__pb2.TranscriptResult.SerializeToString, - ), - 'TTS': grpc.unary_unary_rpc_method_handler( - servicer.TTS, - request_deserializer=backend__pb2.TTSRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'TokenizeString': grpc.unary_unary_rpc_method_handler( - servicer.TokenizeString, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.TokenizationResponse.SerializeToString, - ), - 'Status': grpc.unary_unary_rpc_method_handler( - servicer.Status, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.StatusResponse.SerializeToString, - ), - 'StoresSet': grpc.unary_unary_rpc_method_handler( - servicer.StoresSet, - request_deserializer=backend__pb2.StoresSetOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresDelete': grpc.unary_unary_rpc_method_handler( - servicer.StoresDelete, - request_deserializer=backend__pb2.StoresDeleteOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresGet': grpc.unary_unary_rpc_method_handler( - servicer.StoresGet, - request_deserializer=backend__pb2.StoresGetOptions.FromString, - response_serializer=backend__pb2.StoresGetResult.SerializeToString, - ), - 'StoresFind': grpc.unary_unary_rpc_method_handler( - servicer.StoresFind, - request_deserializer=backend__pb2.StoresFindOptions.FromString, - response_serializer=backend__pb2.StoresFindResult.SerializeToString, - ), - } - generic_handler = grpc.method_handlers_generic_handler( - 'backend.Backend', rpc_method_handlers) - server.add_generic_rpc_handlers((generic_handler,)) - - - # This class is part of an EXPERIMENTAL API. -class Backend(object): - """Missing associated documentation comment in .proto file.""" - - @staticmethod - def Health(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Predict(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def LoadModel(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel', - backend__pb2.ModelOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def PredictStream(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Embedding(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.EmbeddingResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def GenerateImage(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage', - backend__pb2.GenerateImageRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def AudioTranscription(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription', - backend__pb2.TranscriptRequest.SerializeToString, - backend__pb2.TranscriptResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TTS(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS', - backend__pb2.TTSRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TokenizeString(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.TokenizationResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Status(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.StatusResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresSet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', - backend__pb2.StoresSetOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresDelete(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', - backend__pb2.StoresDeleteOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresGet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', - backend__pb2.StoresGetOptions.SerializeToString, - backend__pb2.StoresGetResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresFind(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', - backend__pb2.StoresFindOptions.SerializeToString, - backend__pb2.StoresFindResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/transformers/Makefile b/backend/python/transformers/Makefile index 4eeb9ad5..afe48405 100644 --- a/backend/python/transformers/Makefile +++ b/backend/python/transformers/Makefile @@ -1,16 +1,26 @@ .PHONY: transformers -transformers: +transformers: protogen $(MAKE) -C ../common-env/transformers .PHONY: run -run: +run: protogen @echo "Running transformers..." bash run.sh @echo "transformers run." # It is not working well by using command line. It only6 works with IDE like VSCode. .PHONY: test -test: +test: protogen @echo "Testing transformers..." bash test.sh @echo "transformers tested." + +.PHONY: protogen +protogen: backend_pb2_grpc.py backend_pb2.py + +.PHONY: protogen-clean +protogen-clean: + $(RM) backend_pb2_grpc.py backend_pb2.py + +backend_pb2_grpc.py backend_pb2.py: + python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto \ No newline at end of file diff --git a/backend/python/transformers/backend_pb2.py b/backend/python/transformers/backend_pb2.py deleted file mode 100644 index 24b6de3b..00000000 --- a/backend/python/transformers/backend_pb2.py +++ /dev/null @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: backend.proto -# Protobuf Python Version: 4.25.1 -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import symbol_database as _symbol_database -from google.protobuf.internal import builder as _builder -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') - -_globals = globals() -_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) -if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' - _globals['_STORESKEY']._serialized_start=26 - _globals['_STORESKEY']._serialized_end=53 - _globals['_STORESVALUE']._serialized_start=55 - _globals['_STORESVALUE']._serialized_end=83 - _globals['_STORESSETOPTIONS']._serialized_start=85 - _globals['_STORESSETOPTIONS']._serialized_end=175 - _globals['_STORESDELETEOPTIONS']._serialized_start=177 - _globals['_STORESDELETEOPTIONS']._serialized_end=232 - _globals['_STORESGETOPTIONS']._serialized_start=234 - _globals['_STORESGETOPTIONS']._serialized_end=286 - _globals['_STORESGETRESULT']._serialized_start=288 - _globals['_STORESGETRESULT']._serialized_end=377 - _globals['_STORESFINDOPTIONS']._serialized_start=379 - _globals['_STORESFINDOPTIONS']._serialized_end=445 - _globals['_STORESFINDRESULT']._serialized_start=447 - _globals['_STORESFINDRESULT']._serialized_end=559 - _globals['_HEALTHMESSAGE']._serialized_start=561 - _globals['_HEALTHMESSAGE']._serialized_end=576 - _globals['_PREDICTOPTIONS']._serialized_start=579 - _globals['_PREDICTOPTIONS']._serialized_end=1451 - _globals['_REPLY']._serialized_start=1453 - _globals['_REPLY']._serialized_end=1477 - _globals['_MODELOPTIONS']._serialized_start=1480 - _globals['_MODELOPTIONS']._serialized_end=2552 - _globals['_RESULT']._serialized_start=2554 - _globals['_RESULT']._serialized_end=2596 - _globals['_EMBEDDINGRESULT']._serialized_start=2598 - _globals['_EMBEDDINGRESULT']._serialized_end=2635 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 - _globals['_TRANSCRIPTRESULT']._serialized_start=2706 - _globals['_TRANSCRIPTRESULT']._serialized_end=2784 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 - _globals['_TTSREQUEST']._serialized_start=3095 - _globals['_TTSREQUEST']._serialized_end=3164 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 - _globals['_MEMORYUSAGEDATA']._serialized_start=3223 - _globals['_MEMORYUSAGEDATA']._serialized_end=3365 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 - _globals['_STATUSRESPONSE']._serialized_start=3368 - _globals['_STATUSRESPONSE']._serialized_end=3541 - _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 - _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 - _globals['_MESSAGE']._serialized_start=3543 - _globals['_MESSAGE']._serialized_end=3583 - _globals['_BACKEND']._serialized_start=3586 - _globals['_BACKEND']._serialized_end=4477 -# @@protoc_insertion_point(module_scope) diff --git a/backend/python/transformers/backend_pb2_grpc.py b/backend/python/transformers/backend_pb2_grpc.py deleted file mode 100644 index e06fccf3..00000000 --- a/backend/python/transformers/backend_pb2_grpc.py +++ /dev/null @@ -1,495 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc - -import backend_pb2 as backend__pb2 - - -class BackendStub(object): - """Missing associated documentation comment in .proto file.""" - - def __init__(self, channel): - """Constructor. - - Args: - channel: A grpc.Channel. - """ - self.Health = channel.unary_unary( - '/backend.Backend/Health', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Predict = channel.unary_unary( - '/backend.Backend/Predict', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.LoadModel = channel.unary_unary( - '/backend.Backend/LoadModel', - request_serializer=backend__pb2.ModelOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.PredictStream = channel.unary_stream( - '/backend.Backend/PredictStream', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Embedding = channel.unary_unary( - '/backend.Backend/Embedding', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.EmbeddingResult.FromString, - ) - self.GenerateImage = channel.unary_unary( - '/backend.Backend/GenerateImage', - request_serializer=backend__pb2.GenerateImageRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.AudioTranscription = channel.unary_unary( - '/backend.Backend/AudioTranscription', - request_serializer=backend__pb2.TranscriptRequest.SerializeToString, - response_deserializer=backend__pb2.TranscriptResult.FromString, - ) - self.TTS = channel.unary_unary( - '/backend.Backend/TTS', - request_serializer=backend__pb2.TTSRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.TokenizeString = channel.unary_unary( - '/backend.Backend/TokenizeString', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.TokenizationResponse.FromString, - ) - self.Status = channel.unary_unary( - '/backend.Backend/Status', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.StatusResponse.FromString, - ) - self.StoresSet = channel.unary_unary( - '/backend.Backend/StoresSet', - request_serializer=backend__pb2.StoresSetOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresDelete = channel.unary_unary( - '/backend.Backend/StoresDelete', - request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresGet = channel.unary_unary( - '/backend.Backend/StoresGet', - request_serializer=backend__pb2.StoresGetOptions.SerializeToString, - response_deserializer=backend__pb2.StoresGetResult.FromString, - ) - self.StoresFind = channel.unary_unary( - '/backend.Backend/StoresFind', - request_serializer=backend__pb2.StoresFindOptions.SerializeToString, - response_deserializer=backend__pb2.StoresFindResult.FromString, - ) - - -class BackendServicer(object): - """Missing associated documentation comment in .proto file.""" - - def Health(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Predict(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def LoadModel(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def PredictStream(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Embedding(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def GenerateImage(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def AudioTranscription(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TTS(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TokenizeString(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Status(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresSet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresDelete(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresGet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresFind(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - -def add_BackendServicer_to_server(servicer, server): - rpc_method_handlers = { - 'Health': grpc.unary_unary_rpc_method_handler( - servicer.Health, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Predict': grpc.unary_unary_rpc_method_handler( - servicer.Predict, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'LoadModel': grpc.unary_unary_rpc_method_handler( - servicer.LoadModel, - request_deserializer=backend__pb2.ModelOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'PredictStream': grpc.unary_stream_rpc_method_handler( - servicer.PredictStream, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Embedding': grpc.unary_unary_rpc_method_handler( - servicer.Embedding, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.EmbeddingResult.SerializeToString, - ), - 'GenerateImage': grpc.unary_unary_rpc_method_handler( - servicer.GenerateImage, - request_deserializer=backend__pb2.GenerateImageRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'AudioTranscription': grpc.unary_unary_rpc_method_handler( - servicer.AudioTranscription, - request_deserializer=backend__pb2.TranscriptRequest.FromString, - response_serializer=backend__pb2.TranscriptResult.SerializeToString, - ), - 'TTS': grpc.unary_unary_rpc_method_handler( - servicer.TTS, - request_deserializer=backend__pb2.TTSRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'TokenizeString': grpc.unary_unary_rpc_method_handler( - servicer.TokenizeString, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.TokenizationResponse.SerializeToString, - ), - 'Status': grpc.unary_unary_rpc_method_handler( - servicer.Status, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.StatusResponse.SerializeToString, - ), - 'StoresSet': grpc.unary_unary_rpc_method_handler( - servicer.StoresSet, - request_deserializer=backend__pb2.StoresSetOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresDelete': grpc.unary_unary_rpc_method_handler( - servicer.StoresDelete, - request_deserializer=backend__pb2.StoresDeleteOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresGet': grpc.unary_unary_rpc_method_handler( - servicer.StoresGet, - request_deserializer=backend__pb2.StoresGetOptions.FromString, - response_serializer=backend__pb2.StoresGetResult.SerializeToString, - ), - 'StoresFind': grpc.unary_unary_rpc_method_handler( - servicer.StoresFind, - request_deserializer=backend__pb2.StoresFindOptions.FromString, - response_serializer=backend__pb2.StoresFindResult.SerializeToString, - ), - } - generic_handler = grpc.method_handlers_generic_handler( - 'backend.Backend', rpc_method_handlers) - server.add_generic_rpc_handlers((generic_handler,)) - - - # This class is part of an EXPERIMENTAL API. -class Backend(object): - """Missing associated documentation comment in .proto file.""" - - @staticmethod - def Health(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Predict(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def LoadModel(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel', - backend__pb2.ModelOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def PredictStream(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Embedding(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.EmbeddingResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def GenerateImage(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage', - backend__pb2.GenerateImageRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def AudioTranscription(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription', - backend__pb2.TranscriptRequest.SerializeToString, - backend__pb2.TranscriptResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TTS(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS', - backend__pb2.TTSRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TokenizeString(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.TokenizationResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Status(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.StatusResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresSet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', - backend__pb2.StoresSetOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresDelete(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', - backend__pb2.StoresDeleteOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresGet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', - backend__pb2.StoresGetOptions.SerializeToString, - backend__pb2.StoresGetResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresFind(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', - backend__pb2.StoresFindOptions.SerializeToString, - backend__pb2.StoresFindResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/vall-e-x/Makefile b/backend/python/vall-e-x/Makefile index 8f34f559..d7a80e55 100644 --- a/backend/python/vall-e-x/Makefile +++ b/backend/python/vall-e-x/Makefile @@ -3,18 +3,28 @@ export SKIP_CONDA=1 endif .PHONY: ttsvalle -ttsvalle: +ttsvalle: protogen $(MAKE) -C ../common-env/transformers bash install.sh .PHONY: run -run: +run: protogen @echo "Running ttsvalle..." bash run.sh @echo "ttsvalle run." .PHONY: test -test: +test: protogen @echo "Testing valle..." bash test.sh @echo "valle tested." + +.PHONY: protogen +protogen: backend_pb2_grpc.py backend_pb2.py + +.PHONY: protogen-clean +protogen-clean: + $(RM) backend_pb2_grpc.py backend_pb2.py + +backend_pb2_grpc.py backend_pb2.py: + python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto \ No newline at end of file diff --git a/backend/python/vall-e-x/backend_pb2.py b/backend/python/vall-e-x/backend_pb2.py deleted file mode 100644 index 24b6de3b..00000000 --- a/backend/python/vall-e-x/backend_pb2.py +++ /dev/null @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: backend.proto -# Protobuf Python Version: 4.25.1 -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import symbol_database as _symbol_database -from google.protobuf.internal import builder as _builder -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') - -_globals = globals() -_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) -if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' - _globals['_STORESKEY']._serialized_start=26 - _globals['_STORESKEY']._serialized_end=53 - _globals['_STORESVALUE']._serialized_start=55 - _globals['_STORESVALUE']._serialized_end=83 - _globals['_STORESSETOPTIONS']._serialized_start=85 - _globals['_STORESSETOPTIONS']._serialized_end=175 - _globals['_STORESDELETEOPTIONS']._serialized_start=177 - _globals['_STORESDELETEOPTIONS']._serialized_end=232 - _globals['_STORESGETOPTIONS']._serialized_start=234 - _globals['_STORESGETOPTIONS']._serialized_end=286 - _globals['_STORESGETRESULT']._serialized_start=288 - _globals['_STORESGETRESULT']._serialized_end=377 - _globals['_STORESFINDOPTIONS']._serialized_start=379 - _globals['_STORESFINDOPTIONS']._serialized_end=445 - _globals['_STORESFINDRESULT']._serialized_start=447 - _globals['_STORESFINDRESULT']._serialized_end=559 - _globals['_HEALTHMESSAGE']._serialized_start=561 - _globals['_HEALTHMESSAGE']._serialized_end=576 - _globals['_PREDICTOPTIONS']._serialized_start=579 - _globals['_PREDICTOPTIONS']._serialized_end=1451 - _globals['_REPLY']._serialized_start=1453 - _globals['_REPLY']._serialized_end=1477 - _globals['_MODELOPTIONS']._serialized_start=1480 - _globals['_MODELOPTIONS']._serialized_end=2552 - _globals['_RESULT']._serialized_start=2554 - _globals['_RESULT']._serialized_end=2596 - _globals['_EMBEDDINGRESULT']._serialized_start=2598 - _globals['_EMBEDDINGRESULT']._serialized_end=2635 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 - _globals['_TRANSCRIPTRESULT']._serialized_start=2706 - _globals['_TRANSCRIPTRESULT']._serialized_end=2784 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 - _globals['_TTSREQUEST']._serialized_start=3095 - _globals['_TTSREQUEST']._serialized_end=3164 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 - _globals['_MEMORYUSAGEDATA']._serialized_start=3223 - _globals['_MEMORYUSAGEDATA']._serialized_end=3365 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 - _globals['_STATUSRESPONSE']._serialized_start=3368 - _globals['_STATUSRESPONSE']._serialized_end=3541 - _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 - _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 - _globals['_MESSAGE']._serialized_start=3543 - _globals['_MESSAGE']._serialized_end=3583 - _globals['_BACKEND']._serialized_start=3586 - _globals['_BACKEND']._serialized_end=4477 -# @@protoc_insertion_point(module_scope) diff --git a/backend/python/vall-e-x/backend_pb2_grpc.py b/backend/python/vall-e-x/backend_pb2_grpc.py deleted file mode 100644 index e06fccf3..00000000 --- a/backend/python/vall-e-x/backend_pb2_grpc.py +++ /dev/null @@ -1,495 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc - -import backend_pb2 as backend__pb2 - - -class BackendStub(object): - """Missing associated documentation comment in .proto file.""" - - def __init__(self, channel): - """Constructor. - - Args: - channel: A grpc.Channel. - """ - self.Health = channel.unary_unary( - '/backend.Backend/Health', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Predict = channel.unary_unary( - '/backend.Backend/Predict', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.LoadModel = channel.unary_unary( - '/backend.Backend/LoadModel', - request_serializer=backend__pb2.ModelOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.PredictStream = channel.unary_stream( - '/backend.Backend/PredictStream', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Embedding = channel.unary_unary( - '/backend.Backend/Embedding', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.EmbeddingResult.FromString, - ) - self.GenerateImage = channel.unary_unary( - '/backend.Backend/GenerateImage', - request_serializer=backend__pb2.GenerateImageRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.AudioTranscription = channel.unary_unary( - '/backend.Backend/AudioTranscription', - request_serializer=backend__pb2.TranscriptRequest.SerializeToString, - response_deserializer=backend__pb2.TranscriptResult.FromString, - ) - self.TTS = channel.unary_unary( - '/backend.Backend/TTS', - request_serializer=backend__pb2.TTSRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.TokenizeString = channel.unary_unary( - '/backend.Backend/TokenizeString', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.TokenizationResponse.FromString, - ) - self.Status = channel.unary_unary( - '/backend.Backend/Status', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.StatusResponse.FromString, - ) - self.StoresSet = channel.unary_unary( - '/backend.Backend/StoresSet', - request_serializer=backend__pb2.StoresSetOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresDelete = channel.unary_unary( - '/backend.Backend/StoresDelete', - request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresGet = channel.unary_unary( - '/backend.Backend/StoresGet', - request_serializer=backend__pb2.StoresGetOptions.SerializeToString, - response_deserializer=backend__pb2.StoresGetResult.FromString, - ) - self.StoresFind = channel.unary_unary( - '/backend.Backend/StoresFind', - request_serializer=backend__pb2.StoresFindOptions.SerializeToString, - response_deserializer=backend__pb2.StoresFindResult.FromString, - ) - - -class BackendServicer(object): - """Missing associated documentation comment in .proto file.""" - - def Health(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Predict(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def LoadModel(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def PredictStream(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Embedding(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def GenerateImage(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def AudioTranscription(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TTS(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TokenizeString(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Status(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresSet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresDelete(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresGet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresFind(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - -def add_BackendServicer_to_server(servicer, server): - rpc_method_handlers = { - 'Health': grpc.unary_unary_rpc_method_handler( - servicer.Health, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Predict': grpc.unary_unary_rpc_method_handler( - servicer.Predict, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'LoadModel': grpc.unary_unary_rpc_method_handler( - servicer.LoadModel, - request_deserializer=backend__pb2.ModelOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'PredictStream': grpc.unary_stream_rpc_method_handler( - servicer.PredictStream, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Embedding': grpc.unary_unary_rpc_method_handler( - servicer.Embedding, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.EmbeddingResult.SerializeToString, - ), - 'GenerateImage': grpc.unary_unary_rpc_method_handler( - servicer.GenerateImage, - request_deserializer=backend__pb2.GenerateImageRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'AudioTranscription': grpc.unary_unary_rpc_method_handler( - servicer.AudioTranscription, - request_deserializer=backend__pb2.TranscriptRequest.FromString, - response_serializer=backend__pb2.TranscriptResult.SerializeToString, - ), - 'TTS': grpc.unary_unary_rpc_method_handler( - servicer.TTS, - request_deserializer=backend__pb2.TTSRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'TokenizeString': grpc.unary_unary_rpc_method_handler( - servicer.TokenizeString, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.TokenizationResponse.SerializeToString, - ), - 'Status': grpc.unary_unary_rpc_method_handler( - servicer.Status, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.StatusResponse.SerializeToString, - ), - 'StoresSet': grpc.unary_unary_rpc_method_handler( - servicer.StoresSet, - request_deserializer=backend__pb2.StoresSetOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresDelete': grpc.unary_unary_rpc_method_handler( - servicer.StoresDelete, - request_deserializer=backend__pb2.StoresDeleteOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresGet': grpc.unary_unary_rpc_method_handler( - servicer.StoresGet, - request_deserializer=backend__pb2.StoresGetOptions.FromString, - response_serializer=backend__pb2.StoresGetResult.SerializeToString, - ), - 'StoresFind': grpc.unary_unary_rpc_method_handler( - servicer.StoresFind, - request_deserializer=backend__pb2.StoresFindOptions.FromString, - response_serializer=backend__pb2.StoresFindResult.SerializeToString, - ), - } - generic_handler = grpc.method_handlers_generic_handler( - 'backend.Backend', rpc_method_handlers) - server.add_generic_rpc_handlers((generic_handler,)) - - - # This class is part of an EXPERIMENTAL API. -class Backend(object): - """Missing associated documentation comment in .proto file.""" - - @staticmethod - def Health(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Predict(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def LoadModel(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel', - backend__pb2.ModelOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def PredictStream(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Embedding(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.EmbeddingResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def GenerateImage(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage', - backend__pb2.GenerateImageRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def AudioTranscription(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription', - backend__pb2.TranscriptRequest.SerializeToString, - backend__pb2.TranscriptResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TTS(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS', - backend__pb2.TTSRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TokenizeString(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.TokenizationResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Status(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.StatusResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresSet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', - backend__pb2.StoresSetOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresDelete(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', - backend__pb2.StoresDeleteOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresGet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', - backend__pb2.StoresGetOptions.SerializeToString, - backend__pb2.StoresGetResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresFind(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', - backend__pb2.StoresFindOptions.SerializeToString, - backend__pb2.StoresFindResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/backend/python/vllm/Makefile b/backend/python/vllm/Makefile index 9ee5886d..3e1fdd77 100644 --- a/backend/python/vllm/Makefile +++ b/backend/python/vllm/Makefile @@ -1,15 +1,25 @@ .PHONY: vllm -vllm: +vllm: protogen $(MAKE) -C ../common-env/transformers .PHONY: run -run: +run: protogen @echo "Running vllm..." bash run.sh @echo "vllm run." .PHONY: test -test: +test: protogen @echo "Testing vllm..." bash test.sh - @echo "vllm tested." \ No newline at end of file + @echo "vllm tested." + +.PHONY: protogen +protogen: backend_pb2_grpc.py backend_pb2.py + +.PHONY: protogen-clean +protogen-clean: + $(RM) backend_pb2_grpc.py backend_pb2.py + +backend_pb2_grpc.py backend_pb2.py: + python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto \ No newline at end of file diff --git a/backend/python/vllm/backend_pb2.py b/backend/python/vllm/backend_pb2.py deleted file mode 100644 index 24b6de3b..00000000 --- a/backend/python/vllm/backend_pb2.py +++ /dev/null @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: backend.proto -# Protobuf Python Version: 4.25.1 -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import symbol_database as _symbol_database -from google.protobuf.internal import builder as _builder -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x1b\n\tStoresKey\x12\x0e\n\x06\x46loats\x18\x01 \x03(\x02\"\x1c\n\x0bStoresValue\x12\r\n\x05\x42ytes\x18\x01 \x01(\x0c\"Z\n\x10StoresSetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"7\n\x13StoresDeleteOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"4\n\x10StoresGetOptions\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\"Y\n\x0fStoresGetResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\"B\n\x11StoresFindOptions\x12\x1f\n\x03Key\x18\x01 \x01(\x0b\x32\x12.backend.StoresKey\x12\x0c\n\x04TopK\x18\x02 \x01(\x05\"p\n\x10StoresFindResult\x12 \n\x04Keys\x18\x01 \x03(\x0b\x32\x12.backend.StoresKey\x12$\n\x06Values\x18\x02 \x03(\x0b\x32\x14.backend.StoresValue\x12\x14\n\x0cSimilarities\x18\x03 \x03(\x02\"\x0f\n\rHealthMessage\"\xe8\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\x12\x0e\n\x06NDraft\x18) \x01(\x05\x12\x0e\n\x06Images\x18* \x03(\t\x12\x1c\n\x14UseTokenizerTemplate\x18+ \x01(\x08\x12\"\n\x08Messages\x18, \x03(\x0b\x32\x10.backend.Message\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xb0\x08\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\x12\x14\n\x0cPipelineType\x18\x1a \x01(\t\x12\x15\n\rSchedulerType\x18\x1b \x01(\t\x12\x0c\n\x04\x43UDA\x18\x1c \x01(\x08\x12\x10\n\x08\x43\x46GScale\x18\x1d \x01(\x02\x12\x0f\n\x07IMG2IMG\x18\x1e \x01(\x08\x12\x11\n\tCLIPModel\x18\x1f \x01(\t\x12\x15\n\rCLIPSubfolder\x18 \x01(\t\x12\x10\n\x08\x43LIPSkip\x18! \x01(\x05\x12\x12\n\nControlNet\x18\x30 \x01(\t\x12\x11\n\tTokenizer\x18\" \x01(\t\x12\x10\n\x08LoraBase\x18# \x01(\t\x12\x13\n\x0bLoraAdapter\x18$ \x01(\t\x12\x11\n\tLoraScale\x18* \x01(\x02\x12\x11\n\tNoMulMatQ\x18% \x01(\x08\x12\x12\n\nDraftModel\x18\' \x01(\t\x12\x11\n\tAudioPath\x18& \x01(\t\x12\x14\n\x0cQuantization\x18( \x01(\t\x12\x1c\n\x14GPUMemoryUtilization\x18\x32 \x01(\x02\x12\x17\n\x0fTrustRemoteCode\x18\x33 \x01(\x08\x12\x14\n\x0c\x45nforceEager\x18\x34 \x01(\x08\x12\x11\n\tSwapSpace\x18\x35 \x01(\x05\x12\x13\n\x0bMaxModelLen\x18\x36 \x01(\x05\x12\x0e\n\x06MMProj\x18) \x01(\t\x12\x13\n\x0bRopeScaling\x18+ \x01(\t\x12\x15\n\rYarnExtFactor\x18, \x01(\x02\x12\x16\n\x0eYarnAttnFactor\x18- \x01(\x02\x12\x14\n\x0cYarnBetaFast\x18. \x01(\x02\x12\x14\n\x0cYarnBetaSlow\x18/ \x01(\x02\x12\x0c\n\x04Type\x18\x31 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\xd7\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\x12\x0b\n\x03src\x18\t \x01(\t\x12\x18\n\x10\x45nableParameters\x18\n \x01(\t\x12\x10\n\x08\x43LIPSkip\x18\x0b \x01(\x05\"E\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t\x12\r\n\x05voice\x18\x04 \x01(\t\"6\n\x14TokenizationResponse\x12\x0e\n\x06length\x18\x01 \x01(\x05\x12\x0e\n\x06tokens\x18\x02 \x03(\x05\"\x8e\x01\n\x0fMemoryUsageData\x12\r\n\x05total\x18\x01 \x01(\x04\x12:\n\tbreakdown\x18\x02 \x03(\x0b\x32\'.backend.MemoryUsageData.BreakdownEntry\x1a\x30\n\x0e\x42reakdownEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x04:\x02\x38\x01\"\xad\x01\n\x0eStatusResponse\x12,\n\x05state\x18\x01 \x01(\x0e\x32\x1d.backend.StatusResponse.State\x12(\n\x06memory\x18\x02 \x01(\x0b\x32\x18.backend.MemoryUsageData\"C\n\x05State\x12\x11\n\rUNINITIALIZED\x10\x00\x12\x08\n\x04\x42USY\x10\x01\x12\t\n\x05READY\x10\x02\x12\x12\n\x05\x45RROR\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\"(\n\x07Message\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t2\xfb\x06\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x12J\n\x0eTokenizeString\x12\x17.backend.PredictOptions\x1a\x1d.backend.TokenizationResponse\"\x00\x12;\n\x06Status\x12\x16.backend.HealthMessage\x1a\x17.backend.StatusResponse\"\x00\x12\x39\n\tStoresSet\x12\x19.backend.StoresSetOptions\x1a\x0f.backend.Result\"\x00\x12?\n\x0cStoresDelete\x12\x1c.backend.StoresDeleteOptions\x1a\x0f.backend.Result\"\x00\x12\x42\n\tStoresGet\x12\x19.backend.StoresGetOptions\x1a\x18.backend.StoresGetResult\"\x00\x12\x45\n\nStoresFind\x12\x1a.backend.StoresFindOptions\x1a\x19.backend.StoresFindResult\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') - -_globals = globals() -_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) -if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._options = None - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_options = b'8\001' - _globals['_STORESKEY']._serialized_start=26 - _globals['_STORESKEY']._serialized_end=53 - _globals['_STORESVALUE']._serialized_start=55 - _globals['_STORESVALUE']._serialized_end=83 - _globals['_STORESSETOPTIONS']._serialized_start=85 - _globals['_STORESSETOPTIONS']._serialized_end=175 - _globals['_STORESDELETEOPTIONS']._serialized_start=177 - _globals['_STORESDELETEOPTIONS']._serialized_end=232 - _globals['_STORESGETOPTIONS']._serialized_start=234 - _globals['_STORESGETOPTIONS']._serialized_end=286 - _globals['_STORESGETRESULT']._serialized_start=288 - _globals['_STORESGETRESULT']._serialized_end=377 - _globals['_STORESFINDOPTIONS']._serialized_start=379 - _globals['_STORESFINDOPTIONS']._serialized_end=445 - _globals['_STORESFINDRESULT']._serialized_start=447 - _globals['_STORESFINDRESULT']._serialized_end=559 - _globals['_HEALTHMESSAGE']._serialized_start=561 - _globals['_HEALTHMESSAGE']._serialized_end=576 - _globals['_PREDICTOPTIONS']._serialized_start=579 - _globals['_PREDICTOPTIONS']._serialized_end=1451 - _globals['_REPLY']._serialized_start=1453 - _globals['_REPLY']._serialized_end=1477 - _globals['_MODELOPTIONS']._serialized_start=1480 - _globals['_MODELOPTIONS']._serialized_end=2552 - _globals['_RESULT']._serialized_start=2554 - _globals['_RESULT']._serialized_end=2596 - _globals['_EMBEDDINGRESULT']._serialized_start=2598 - _globals['_EMBEDDINGRESULT']._serialized_end=2635 - _globals['_TRANSCRIPTREQUEST']._serialized_start=2637 - _globals['_TRANSCRIPTREQUEST']._serialized_end=2704 - _globals['_TRANSCRIPTRESULT']._serialized_start=2706 - _globals['_TRANSCRIPTRESULT']._serialized_end=2784 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=2786 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=2875 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=2878 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=3093 - _globals['_TTSREQUEST']._serialized_start=3095 - _globals['_TTSREQUEST']._serialized_end=3164 - _globals['_TOKENIZATIONRESPONSE']._serialized_start=3166 - _globals['_TOKENIZATIONRESPONSE']._serialized_end=3220 - _globals['_MEMORYUSAGEDATA']._serialized_start=3223 - _globals['_MEMORYUSAGEDATA']._serialized_end=3365 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_start=3317 - _globals['_MEMORYUSAGEDATA_BREAKDOWNENTRY']._serialized_end=3365 - _globals['_STATUSRESPONSE']._serialized_start=3368 - _globals['_STATUSRESPONSE']._serialized_end=3541 - _globals['_STATUSRESPONSE_STATE']._serialized_start=3474 - _globals['_STATUSRESPONSE_STATE']._serialized_end=3541 - _globals['_MESSAGE']._serialized_start=3543 - _globals['_MESSAGE']._serialized_end=3583 - _globals['_BACKEND']._serialized_start=3586 - _globals['_BACKEND']._serialized_end=4477 -# @@protoc_insertion_point(module_scope) diff --git a/backend/python/vllm/backend_pb2_grpc.py b/backend/python/vllm/backend_pb2_grpc.py deleted file mode 100644 index e06fccf3..00000000 --- a/backend/python/vllm/backend_pb2_grpc.py +++ /dev/null @@ -1,495 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc - -import backend_pb2 as backend__pb2 - - -class BackendStub(object): - """Missing associated documentation comment in .proto file.""" - - def __init__(self, channel): - """Constructor. - - Args: - channel: A grpc.Channel. - """ - self.Health = channel.unary_unary( - '/backend.Backend/Health', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Predict = channel.unary_unary( - '/backend.Backend/Predict', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.LoadModel = channel.unary_unary( - '/backend.Backend/LoadModel', - request_serializer=backend__pb2.ModelOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.PredictStream = channel.unary_stream( - '/backend.Backend/PredictStream', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.Reply.FromString, - ) - self.Embedding = channel.unary_unary( - '/backend.Backend/Embedding', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.EmbeddingResult.FromString, - ) - self.GenerateImage = channel.unary_unary( - '/backend.Backend/GenerateImage', - request_serializer=backend__pb2.GenerateImageRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.AudioTranscription = channel.unary_unary( - '/backend.Backend/AudioTranscription', - request_serializer=backend__pb2.TranscriptRequest.SerializeToString, - response_deserializer=backend__pb2.TranscriptResult.FromString, - ) - self.TTS = channel.unary_unary( - '/backend.Backend/TTS', - request_serializer=backend__pb2.TTSRequest.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.TokenizeString = channel.unary_unary( - '/backend.Backend/TokenizeString', - request_serializer=backend__pb2.PredictOptions.SerializeToString, - response_deserializer=backend__pb2.TokenizationResponse.FromString, - ) - self.Status = channel.unary_unary( - '/backend.Backend/Status', - request_serializer=backend__pb2.HealthMessage.SerializeToString, - response_deserializer=backend__pb2.StatusResponse.FromString, - ) - self.StoresSet = channel.unary_unary( - '/backend.Backend/StoresSet', - request_serializer=backend__pb2.StoresSetOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresDelete = channel.unary_unary( - '/backend.Backend/StoresDelete', - request_serializer=backend__pb2.StoresDeleteOptions.SerializeToString, - response_deserializer=backend__pb2.Result.FromString, - ) - self.StoresGet = channel.unary_unary( - '/backend.Backend/StoresGet', - request_serializer=backend__pb2.StoresGetOptions.SerializeToString, - response_deserializer=backend__pb2.StoresGetResult.FromString, - ) - self.StoresFind = channel.unary_unary( - '/backend.Backend/StoresFind', - request_serializer=backend__pb2.StoresFindOptions.SerializeToString, - response_deserializer=backend__pb2.StoresFindResult.FromString, - ) - - -class BackendServicer(object): - """Missing associated documentation comment in .proto file.""" - - def Health(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Predict(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def LoadModel(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def PredictStream(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Embedding(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def GenerateImage(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def AudioTranscription(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TTS(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TokenizeString(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def Status(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresSet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresDelete(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresGet(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def StoresFind(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - -def add_BackendServicer_to_server(servicer, server): - rpc_method_handlers = { - 'Health': grpc.unary_unary_rpc_method_handler( - servicer.Health, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Predict': grpc.unary_unary_rpc_method_handler( - servicer.Predict, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'LoadModel': grpc.unary_unary_rpc_method_handler( - servicer.LoadModel, - request_deserializer=backend__pb2.ModelOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'PredictStream': grpc.unary_stream_rpc_method_handler( - servicer.PredictStream, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.Reply.SerializeToString, - ), - 'Embedding': grpc.unary_unary_rpc_method_handler( - servicer.Embedding, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.EmbeddingResult.SerializeToString, - ), - 'GenerateImage': grpc.unary_unary_rpc_method_handler( - servicer.GenerateImage, - request_deserializer=backend__pb2.GenerateImageRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'AudioTranscription': grpc.unary_unary_rpc_method_handler( - servicer.AudioTranscription, - request_deserializer=backend__pb2.TranscriptRequest.FromString, - response_serializer=backend__pb2.TranscriptResult.SerializeToString, - ), - 'TTS': grpc.unary_unary_rpc_method_handler( - servicer.TTS, - request_deserializer=backend__pb2.TTSRequest.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'TokenizeString': grpc.unary_unary_rpc_method_handler( - servicer.TokenizeString, - request_deserializer=backend__pb2.PredictOptions.FromString, - response_serializer=backend__pb2.TokenizationResponse.SerializeToString, - ), - 'Status': grpc.unary_unary_rpc_method_handler( - servicer.Status, - request_deserializer=backend__pb2.HealthMessage.FromString, - response_serializer=backend__pb2.StatusResponse.SerializeToString, - ), - 'StoresSet': grpc.unary_unary_rpc_method_handler( - servicer.StoresSet, - request_deserializer=backend__pb2.StoresSetOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresDelete': grpc.unary_unary_rpc_method_handler( - servicer.StoresDelete, - request_deserializer=backend__pb2.StoresDeleteOptions.FromString, - response_serializer=backend__pb2.Result.SerializeToString, - ), - 'StoresGet': grpc.unary_unary_rpc_method_handler( - servicer.StoresGet, - request_deserializer=backend__pb2.StoresGetOptions.FromString, - response_serializer=backend__pb2.StoresGetResult.SerializeToString, - ), - 'StoresFind': grpc.unary_unary_rpc_method_handler( - servicer.StoresFind, - request_deserializer=backend__pb2.StoresFindOptions.FromString, - response_serializer=backend__pb2.StoresFindResult.SerializeToString, - ), - } - generic_handler = grpc.method_handlers_generic_handler( - 'backend.Backend', rpc_method_handlers) - server.add_generic_rpc_handlers((generic_handler,)) - - - # This class is part of an EXPERIMENTAL API. -class Backend(object): - """Missing associated documentation comment in .proto file.""" - - @staticmethod - def Health(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Predict(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def LoadModel(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel', - backend__pb2.ModelOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def PredictStream(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.Reply.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Embedding(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.EmbeddingResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def GenerateImage(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage', - backend__pb2.GenerateImageRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def AudioTranscription(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription', - backend__pb2.TranscriptRequest.SerializeToString, - backend__pb2.TranscriptResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TTS(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS', - backend__pb2.TTSRequest.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def TokenizeString(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString', - backend__pb2.PredictOptions.SerializeToString, - backend__pb2.TokenizationResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def Status(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status', - backend__pb2.HealthMessage.SerializeToString, - backend__pb2.StatusResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresSet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresSet', - backend__pb2.StoresSetOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresDelete(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresDelete', - backend__pb2.StoresDeleteOptions.SerializeToString, - backend__pb2.Result.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresGet(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresGet', - backend__pb2.StoresGetOptions.SerializeToString, - backend__pb2.StoresGetResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def StoresFind(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/backend.Backend/StoresFind', - backend__pb2.StoresFindOptions.SerializeToString, - backend__pb2.StoresFindResult.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/docs/content/docs/getting-started/build.md b/docs/content/docs/getting-started/build.md index 8ceaf1f5..a4db135e 100644 --- a/docs/content/docs/getting-started/build.md +++ b/docs/content/docs/getting-started/build.md @@ -36,14 +36,28 @@ To install the dependencies follow the instructions below: Install `xcode` from the App Store ```bash -brew install abseil cmake go grpc protobuf wget +brew install abseil cmake go grpc protobuf protoc-gen-go protoc-gen-go-grpc python wget +``` + +After installing the above dependencies, you need to install grpcio-tools from PyPI. You could do this via a pip --user install or a virtualenv. + +```bash +pip install --user grpcio-tools ``` {{% /tab %}} {{% tab tabName="Debian" %}} ```bash -apt install golang protobuf-compiler-grpc libgrpc-dev make cmake +apt install cmake golang libgrpc-dev make protobuf-compiler-grpc python3-grpc-tools +``` + +After you have golang installed and working, you can install the required binaries for compiling the golang protobuf components via the following commands + +```bash +go install google.golang.org/protobuf/cmd/protoc-gen-go@latest +go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest + ``` {{% /tab %}} diff --git a/pkg/grpc/proto/backend.pb.go b/pkg/grpc/proto/backend.pb.go deleted file mode 100644 index e9afe196..00000000 --- a/pkg/grpc/proto/backend.pb.go +++ /dev/null @@ -1,2934 +0,0 @@ -// Code generated by protoc-gen-go. DO NOT EDIT. -// versions: -// protoc-gen-go v1.26.0 -// protoc v5.26.1 -// source: backend.proto - -package proto - -import ( - protoreflect "google.golang.org/protobuf/reflect/protoreflect" - protoimpl "google.golang.org/protobuf/runtime/protoimpl" - reflect "reflect" - sync "sync" -) - -const ( - // Verify that this generated code is sufficiently up-to-date. - _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) - // Verify that runtime/protoimpl is sufficiently up-to-date. - _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) -) - -type StatusResponse_State int32 - -const ( - StatusResponse_UNINITIALIZED StatusResponse_State = 0 - StatusResponse_BUSY StatusResponse_State = 1 - StatusResponse_READY StatusResponse_State = 2 - StatusResponse_ERROR StatusResponse_State = -1 -) - -// Enum value maps for StatusResponse_State. -var ( - StatusResponse_State_name = map[int32]string{ - 0: "UNINITIALIZED", - 1: "BUSY", - 2: "READY", - -1: "ERROR", - } - StatusResponse_State_value = map[string]int32{ - "UNINITIALIZED": 0, - "BUSY": 1, - "READY": 2, - "ERROR": -1, - } -) - -func (x StatusResponse_State) Enum() *StatusResponse_State { - p := new(StatusResponse_State) - *p = x - return p -} - -func (x StatusResponse_State) String() string { - return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) -} - -func (StatusResponse_State) Descriptor() protoreflect.EnumDescriptor { - return file_backend_proto_enumTypes[0].Descriptor() -} - -func (StatusResponse_State) Type() protoreflect.EnumType { - return &file_backend_proto_enumTypes[0] -} - -func (x StatusResponse_State) Number() protoreflect.EnumNumber { - return protoreflect.EnumNumber(x) -} - -// Deprecated: Use StatusResponse_State.Descriptor instead. -func (StatusResponse_State) EnumDescriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{21, 0} -} - -type StoresKey struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Floats []float32 `protobuf:"fixed32,1,rep,packed,name=Floats,proto3" json:"Floats,omitempty"` -} - -func (x *StoresKey) Reset() { - *x = StoresKey{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[0] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *StoresKey) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*StoresKey) ProtoMessage() {} - -func (x *StoresKey) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[0] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use StoresKey.ProtoReflect.Descriptor instead. -func (*StoresKey) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{0} -} - -func (x *StoresKey) GetFloats() []float32 { - if x != nil { - return x.Floats - } - return nil -} - -type StoresValue struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Bytes []byte `protobuf:"bytes,1,opt,name=Bytes,proto3" json:"Bytes,omitempty"` -} - -func (x *StoresValue) Reset() { - *x = StoresValue{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[1] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *StoresValue) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*StoresValue) ProtoMessage() {} - -func (x *StoresValue) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[1] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use StoresValue.ProtoReflect.Descriptor instead. -func (*StoresValue) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{1} -} - -func (x *StoresValue) GetBytes() []byte { - if x != nil { - return x.Bytes - } - return nil -} - -type StoresSetOptions struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Keys []*StoresKey `protobuf:"bytes,1,rep,name=Keys,proto3" json:"Keys,omitempty"` - Values []*StoresValue `protobuf:"bytes,2,rep,name=Values,proto3" json:"Values,omitempty"` -} - -func (x *StoresSetOptions) Reset() { - *x = StoresSetOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[2] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *StoresSetOptions) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*StoresSetOptions) ProtoMessage() {} - -func (x *StoresSetOptions) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[2] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use StoresSetOptions.ProtoReflect.Descriptor instead. -func (*StoresSetOptions) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{2} -} - -func (x *StoresSetOptions) GetKeys() []*StoresKey { - if x != nil { - return x.Keys - } - return nil -} - -func (x *StoresSetOptions) GetValues() []*StoresValue { - if x != nil { - return x.Values - } - return nil -} - -type StoresDeleteOptions struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Keys []*StoresKey `protobuf:"bytes,1,rep,name=Keys,proto3" json:"Keys,omitempty"` -} - -func (x *StoresDeleteOptions) Reset() { - *x = StoresDeleteOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[3] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *StoresDeleteOptions) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*StoresDeleteOptions) ProtoMessage() {} - -func (x *StoresDeleteOptions) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[3] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use StoresDeleteOptions.ProtoReflect.Descriptor instead. -func (*StoresDeleteOptions) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{3} -} - -func (x *StoresDeleteOptions) GetKeys() []*StoresKey { - if x != nil { - return x.Keys - } - return nil -} - -type StoresGetOptions struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Keys []*StoresKey `protobuf:"bytes,1,rep,name=Keys,proto3" json:"Keys,omitempty"` -} - -func (x *StoresGetOptions) Reset() { - *x = StoresGetOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[4] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *StoresGetOptions) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*StoresGetOptions) ProtoMessage() {} - -func (x *StoresGetOptions) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[4] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use StoresGetOptions.ProtoReflect.Descriptor instead. -func (*StoresGetOptions) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{4} -} - -func (x *StoresGetOptions) GetKeys() []*StoresKey { - if x != nil { - return x.Keys - } - return nil -} - -type StoresGetResult struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Keys []*StoresKey `protobuf:"bytes,1,rep,name=Keys,proto3" json:"Keys,omitempty"` - Values []*StoresValue `protobuf:"bytes,2,rep,name=Values,proto3" json:"Values,omitempty"` -} - -func (x *StoresGetResult) Reset() { - *x = StoresGetResult{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[5] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *StoresGetResult) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*StoresGetResult) ProtoMessage() {} - -func (x *StoresGetResult) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[5] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use StoresGetResult.ProtoReflect.Descriptor instead. -func (*StoresGetResult) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{5} -} - -func (x *StoresGetResult) GetKeys() []*StoresKey { - if x != nil { - return x.Keys - } - return nil -} - -func (x *StoresGetResult) GetValues() []*StoresValue { - if x != nil { - return x.Values - } - return nil -} - -type StoresFindOptions struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Key *StoresKey `protobuf:"bytes,1,opt,name=Key,proto3" json:"Key,omitempty"` - TopK int32 `protobuf:"varint,2,opt,name=TopK,proto3" json:"TopK,omitempty"` -} - -func (x *StoresFindOptions) Reset() { - *x = StoresFindOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[6] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *StoresFindOptions) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*StoresFindOptions) ProtoMessage() {} - -func (x *StoresFindOptions) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[6] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use StoresFindOptions.ProtoReflect.Descriptor instead. -func (*StoresFindOptions) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{6} -} - -func (x *StoresFindOptions) GetKey() *StoresKey { - if x != nil { - return x.Key - } - return nil -} - -func (x *StoresFindOptions) GetTopK() int32 { - if x != nil { - return x.TopK - } - return 0 -} - -type StoresFindResult struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Keys []*StoresKey `protobuf:"bytes,1,rep,name=Keys,proto3" json:"Keys,omitempty"` - Values []*StoresValue `protobuf:"bytes,2,rep,name=Values,proto3" json:"Values,omitempty"` - Similarities []float32 `protobuf:"fixed32,3,rep,packed,name=Similarities,proto3" json:"Similarities,omitempty"` -} - -func (x *StoresFindResult) Reset() { - *x = StoresFindResult{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[7] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *StoresFindResult) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*StoresFindResult) ProtoMessage() {} - -func (x *StoresFindResult) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[7] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use StoresFindResult.ProtoReflect.Descriptor instead. -func (*StoresFindResult) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{7} -} - -func (x *StoresFindResult) GetKeys() []*StoresKey { - if x != nil { - return x.Keys - } - return nil -} - -func (x *StoresFindResult) GetValues() []*StoresValue { - if x != nil { - return x.Values - } - return nil -} - -func (x *StoresFindResult) GetSimilarities() []float32 { - if x != nil { - return x.Similarities - } - return nil -} - -type HealthMessage struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields -} - -func (x *HealthMessage) Reset() { - *x = HealthMessage{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[8] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *HealthMessage) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*HealthMessage) ProtoMessage() {} - -func (x *HealthMessage) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[8] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use HealthMessage.ProtoReflect.Descriptor instead. -func (*HealthMessage) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{8} -} - -// The request message containing the user's name. -type PredictOptions struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Prompt string `protobuf:"bytes,1,opt,name=Prompt,proto3" json:"Prompt,omitempty"` - Seed int32 `protobuf:"varint,2,opt,name=Seed,proto3" json:"Seed,omitempty"` - Threads int32 `protobuf:"varint,3,opt,name=Threads,proto3" json:"Threads,omitempty"` - Tokens int32 `protobuf:"varint,4,opt,name=Tokens,proto3" json:"Tokens,omitempty"` - TopK int32 `protobuf:"varint,5,opt,name=TopK,proto3" json:"TopK,omitempty"` - Repeat int32 `protobuf:"varint,6,opt,name=Repeat,proto3" json:"Repeat,omitempty"` - Batch int32 `protobuf:"varint,7,opt,name=Batch,proto3" json:"Batch,omitempty"` - NKeep int32 `protobuf:"varint,8,opt,name=NKeep,proto3" json:"NKeep,omitempty"` - Temperature float32 `protobuf:"fixed32,9,opt,name=Temperature,proto3" json:"Temperature,omitempty"` - Penalty float32 `protobuf:"fixed32,10,opt,name=Penalty,proto3" json:"Penalty,omitempty"` - F16KV bool `protobuf:"varint,11,opt,name=F16KV,proto3" json:"F16KV,omitempty"` - DebugMode bool `protobuf:"varint,12,opt,name=DebugMode,proto3" json:"DebugMode,omitempty"` - StopPrompts []string `protobuf:"bytes,13,rep,name=StopPrompts,proto3" json:"StopPrompts,omitempty"` - IgnoreEOS bool `protobuf:"varint,14,opt,name=IgnoreEOS,proto3" json:"IgnoreEOS,omitempty"` - TailFreeSamplingZ float32 `protobuf:"fixed32,15,opt,name=TailFreeSamplingZ,proto3" json:"TailFreeSamplingZ,omitempty"` - TypicalP float32 `protobuf:"fixed32,16,opt,name=TypicalP,proto3" json:"TypicalP,omitempty"` - FrequencyPenalty float32 `protobuf:"fixed32,17,opt,name=FrequencyPenalty,proto3" json:"FrequencyPenalty,omitempty"` - PresencePenalty float32 `protobuf:"fixed32,18,opt,name=PresencePenalty,proto3" json:"PresencePenalty,omitempty"` - Mirostat int32 `protobuf:"varint,19,opt,name=Mirostat,proto3" json:"Mirostat,omitempty"` - MirostatETA float32 `protobuf:"fixed32,20,opt,name=MirostatETA,proto3" json:"MirostatETA,omitempty"` - MirostatTAU float32 `protobuf:"fixed32,21,opt,name=MirostatTAU,proto3" json:"MirostatTAU,omitempty"` - PenalizeNL bool `protobuf:"varint,22,opt,name=PenalizeNL,proto3" json:"PenalizeNL,omitempty"` - LogitBias string `protobuf:"bytes,23,opt,name=LogitBias,proto3" json:"LogitBias,omitempty"` - MLock bool `protobuf:"varint,25,opt,name=MLock,proto3" json:"MLock,omitempty"` - MMap bool `protobuf:"varint,26,opt,name=MMap,proto3" json:"MMap,omitempty"` - PromptCacheAll bool `protobuf:"varint,27,opt,name=PromptCacheAll,proto3" json:"PromptCacheAll,omitempty"` - PromptCacheRO bool `protobuf:"varint,28,opt,name=PromptCacheRO,proto3" json:"PromptCacheRO,omitempty"` - Grammar string `protobuf:"bytes,29,opt,name=Grammar,proto3" json:"Grammar,omitempty"` - MainGPU string `protobuf:"bytes,30,opt,name=MainGPU,proto3" json:"MainGPU,omitempty"` - TensorSplit string `protobuf:"bytes,31,opt,name=TensorSplit,proto3" json:"TensorSplit,omitempty"` - TopP float32 `protobuf:"fixed32,32,opt,name=TopP,proto3" json:"TopP,omitempty"` - PromptCachePath string `protobuf:"bytes,33,opt,name=PromptCachePath,proto3" json:"PromptCachePath,omitempty"` - Debug bool `protobuf:"varint,34,opt,name=Debug,proto3" json:"Debug,omitempty"` - EmbeddingTokens []int32 `protobuf:"varint,35,rep,packed,name=EmbeddingTokens,proto3" json:"EmbeddingTokens,omitempty"` - Embeddings string `protobuf:"bytes,36,opt,name=Embeddings,proto3" json:"Embeddings,omitempty"` - RopeFreqBase float32 `protobuf:"fixed32,37,opt,name=RopeFreqBase,proto3" json:"RopeFreqBase,omitempty"` - RopeFreqScale float32 `protobuf:"fixed32,38,opt,name=RopeFreqScale,proto3" json:"RopeFreqScale,omitempty"` - NegativePromptScale float32 `protobuf:"fixed32,39,opt,name=NegativePromptScale,proto3" json:"NegativePromptScale,omitempty"` - NegativePrompt string `protobuf:"bytes,40,opt,name=NegativePrompt,proto3" json:"NegativePrompt,omitempty"` - NDraft int32 `protobuf:"varint,41,opt,name=NDraft,proto3" json:"NDraft,omitempty"` - Images []string `protobuf:"bytes,42,rep,name=Images,proto3" json:"Images,omitempty"` - UseTokenizerTemplate bool `protobuf:"varint,43,opt,name=UseTokenizerTemplate,proto3" json:"UseTokenizerTemplate,omitempty"` - Messages []*Message `protobuf:"bytes,44,rep,name=Messages,proto3" json:"Messages,omitempty"` -} - -func (x *PredictOptions) Reset() { - *x = PredictOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[9] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *PredictOptions) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*PredictOptions) ProtoMessage() {} - -func (x *PredictOptions) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[9] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use PredictOptions.ProtoReflect.Descriptor instead. -func (*PredictOptions) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{9} -} - -func (x *PredictOptions) GetPrompt() string { - if x != nil { - return x.Prompt - } - return "" -} - -func (x *PredictOptions) GetSeed() int32 { - if x != nil { - return x.Seed - } - return 0 -} - -func (x *PredictOptions) GetThreads() int32 { - if x != nil { - return x.Threads - } - return 0 -} - -func (x *PredictOptions) GetTokens() int32 { - if x != nil { - return x.Tokens - } - return 0 -} - -func (x *PredictOptions) GetTopK() int32 { - if x != nil { - return x.TopK - } - return 0 -} - -func (x *PredictOptions) GetRepeat() int32 { - if x != nil { - return x.Repeat - } - return 0 -} - -func (x *PredictOptions) GetBatch() int32 { - if x != nil { - return x.Batch - } - return 0 -} - -func (x *PredictOptions) GetNKeep() int32 { - if x != nil { - return x.NKeep - } - return 0 -} - -func (x *PredictOptions) GetTemperature() float32 { - if x != nil { - return x.Temperature - } - return 0 -} - -func (x *PredictOptions) GetPenalty() float32 { - if x != nil { - return x.Penalty - } - return 0 -} - -func (x *PredictOptions) GetF16KV() bool { - if x != nil { - return x.F16KV - } - return false -} - -func (x *PredictOptions) GetDebugMode() bool { - if x != nil { - return x.DebugMode - } - return false -} - -func (x *PredictOptions) GetStopPrompts() []string { - if x != nil { - return x.StopPrompts - } - return nil -} - -func (x *PredictOptions) GetIgnoreEOS() bool { - if x != nil { - return x.IgnoreEOS - } - return false -} - -func (x *PredictOptions) GetTailFreeSamplingZ() float32 { - if x != nil { - return x.TailFreeSamplingZ - } - return 0 -} - -func (x *PredictOptions) GetTypicalP() float32 { - if x != nil { - return x.TypicalP - } - return 0 -} - -func (x *PredictOptions) GetFrequencyPenalty() float32 { - if x != nil { - return x.FrequencyPenalty - } - return 0 -} - -func (x *PredictOptions) GetPresencePenalty() float32 { - if x != nil { - return x.PresencePenalty - } - return 0 -} - -func (x *PredictOptions) GetMirostat() int32 { - if x != nil { - return x.Mirostat - } - return 0 -} - -func (x *PredictOptions) GetMirostatETA() float32 { - if x != nil { - return x.MirostatETA - } - return 0 -} - -func (x *PredictOptions) GetMirostatTAU() float32 { - if x != nil { - return x.MirostatTAU - } - return 0 -} - -func (x *PredictOptions) GetPenalizeNL() bool { - if x != nil { - return x.PenalizeNL - } - return false -} - -func (x *PredictOptions) GetLogitBias() string { - if x != nil { - return x.LogitBias - } - return "" -} - -func (x *PredictOptions) GetMLock() bool { - if x != nil { - return x.MLock - } - return false -} - -func (x *PredictOptions) GetMMap() bool { - if x != nil { - return x.MMap - } - return false -} - -func (x *PredictOptions) GetPromptCacheAll() bool { - if x != nil { - return x.PromptCacheAll - } - return false -} - -func (x *PredictOptions) GetPromptCacheRO() bool { - if x != nil { - return x.PromptCacheRO - } - return false -} - -func (x *PredictOptions) GetGrammar() string { - if x != nil { - return x.Grammar - } - return "" -} - -func (x *PredictOptions) GetMainGPU() string { - if x != nil { - return x.MainGPU - } - return "" -} - -func (x *PredictOptions) GetTensorSplit() string { - if x != nil { - return x.TensorSplit - } - return "" -} - -func (x *PredictOptions) GetTopP() float32 { - if x != nil { - return x.TopP - } - return 0 -} - -func (x *PredictOptions) GetPromptCachePath() string { - if x != nil { - return x.PromptCachePath - } - return "" -} - -func (x *PredictOptions) GetDebug() bool { - if x != nil { - return x.Debug - } - return false -} - -func (x *PredictOptions) GetEmbeddingTokens() []int32 { - if x != nil { - return x.EmbeddingTokens - } - return nil -} - -func (x *PredictOptions) GetEmbeddings() string { - if x != nil { - return x.Embeddings - } - return "" -} - -func (x *PredictOptions) GetRopeFreqBase() float32 { - if x != nil { - return x.RopeFreqBase - } - return 0 -} - -func (x *PredictOptions) GetRopeFreqScale() float32 { - if x != nil { - return x.RopeFreqScale - } - return 0 -} - -func (x *PredictOptions) GetNegativePromptScale() float32 { - if x != nil { - return x.NegativePromptScale - } - return 0 -} - -func (x *PredictOptions) GetNegativePrompt() string { - if x != nil { - return x.NegativePrompt - } - return "" -} - -func (x *PredictOptions) GetNDraft() int32 { - if x != nil { - return x.NDraft - } - return 0 -} - -func (x *PredictOptions) GetImages() []string { - if x != nil { - return x.Images - } - return nil -} - -func (x *PredictOptions) GetUseTokenizerTemplate() bool { - if x != nil { - return x.UseTokenizerTemplate - } - return false -} - -func (x *PredictOptions) GetMessages() []*Message { - if x != nil { - return x.Messages - } - return nil -} - -// The response message containing the result -type Reply struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Message []byte `protobuf:"bytes,1,opt,name=message,proto3" json:"message,omitempty"` -} - -func (x *Reply) Reset() { - *x = Reply{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[10] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *Reply) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*Reply) ProtoMessage() {} - -func (x *Reply) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[10] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use Reply.ProtoReflect.Descriptor instead. -func (*Reply) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{10} -} - -func (x *Reply) GetMessage() []byte { - if x != nil { - return x.Message - } - return nil -} - -type ModelOptions struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Model string `protobuf:"bytes,1,opt,name=Model,proto3" json:"Model,omitempty"` - ContextSize int32 `protobuf:"varint,2,opt,name=ContextSize,proto3" json:"ContextSize,omitempty"` - Seed int32 `protobuf:"varint,3,opt,name=Seed,proto3" json:"Seed,omitempty"` - NBatch int32 `protobuf:"varint,4,opt,name=NBatch,proto3" json:"NBatch,omitempty"` - F16Memory bool `protobuf:"varint,5,opt,name=F16Memory,proto3" json:"F16Memory,omitempty"` - MLock bool `protobuf:"varint,6,opt,name=MLock,proto3" json:"MLock,omitempty"` - MMap bool `protobuf:"varint,7,opt,name=MMap,proto3" json:"MMap,omitempty"` - VocabOnly bool `protobuf:"varint,8,opt,name=VocabOnly,proto3" json:"VocabOnly,omitempty"` - LowVRAM bool `protobuf:"varint,9,opt,name=LowVRAM,proto3" json:"LowVRAM,omitempty"` - Embeddings bool `protobuf:"varint,10,opt,name=Embeddings,proto3" json:"Embeddings,omitempty"` - NUMA bool `protobuf:"varint,11,opt,name=NUMA,proto3" json:"NUMA,omitempty"` - NGPULayers int32 `protobuf:"varint,12,opt,name=NGPULayers,proto3" json:"NGPULayers,omitempty"` - MainGPU string `protobuf:"bytes,13,opt,name=MainGPU,proto3" json:"MainGPU,omitempty"` - TensorSplit string `protobuf:"bytes,14,opt,name=TensorSplit,proto3" json:"TensorSplit,omitempty"` - Threads int32 `protobuf:"varint,15,opt,name=Threads,proto3" json:"Threads,omitempty"` - LibrarySearchPath string `protobuf:"bytes,16,opt,name=LibrarySearchPath,proto3" json:"LibrarySearchPath,omitempty"` - RopeFreqBase float32 `protobuf:"fixed32,17,opt,name=RopeFreqBase,proto3" json:"RopeFreqBase,omitempty"` - RopeFreqScale float32 `protobuf:"fixed32,18,opt,name=RopeFreqScale,proto3" json:"RopeFreqScale,omitempty"` - RMSNormEps float32 `protobuf:"fixed32,19,opt,name=RMSNormEps,proto3" json:"RMSNormEps,omitempty"` - NGQA int32 `protobuf:"varint,20,opt,name=NGQA,proto3" json:"NGQA,omitempty"` - ModelFile string `protobuf:"bytes,21,opt,name=ModelFile,proto3" json:"ModelFile,omitempty"` - // AutoGPTQ - Device string `protobuf:"bytes,22,opt,name=Device,proto3" json:"Device,omitempty"` - UseTriton bool `protobuf:"varint,23,opt,name=UseTriton,proto3" json:"UseTriton,omitempty"` - ModelBaseName string `protobuf:"bytes,24,opt,name=ModelBaseName,proto3" json:"ModelBaseName,omitempty"` - UseFastTokenizer bool `protobuf:"varint,25,opt,name=UseFastTokenizer,proto3" json:"UseFastTokenizer,omitempty"` - // Diffusers - PipelineType string `protobuf:"bytes,26,opt,name=PipelineType,proto3" json:"PipelineType,omitempty"` - SchedulerType string `protobuf:"bytes,27,opt,name=SchedulerType,proto3" json:"SchedulerType,omitempty"` - CUDA bool `protobuf:"varint,28,opt,name=CUDA,proto3" json:"CUDA,omitempty"` - CFGScale float32 `protobuf:"fixed32,29,opt,name=CFGScale,proto3" json:"CFGScale,omitempty"` - IMG2IMG bool `protobuf:"varint,30,opt,name=IMG2IMG,proto3" json:"IMG2IMG,omitempty"` - CLIPModel string `protobuf:"bytes,31,opt,name=CLIPModel,proto3" json:"CLIPModel,omitempty"` - CLIPSubfolder string `protobuf:"bytes,32,opt,name=CLIPSubfolder,proto3" json:"CLIPSubfolder,omitempty"` - CLIPSkip int32 `protobuf:"varint,33,opt,name=CLIPSkip,proto3" json:"CLIPSkip,omitempty"` - ControlNet string `protobuf:"bytes,48,opt,name=ControlNet,proto3" json:"ControlNet,omitempty"` - Tokenizer string `protobuf:"bytes,34,opt,name=Tokenizer,proto3" json:"Tokenizer,omitempty"` - // LLM (llama.cpp) - LoraBase string `protobuf:"bytes,35,opt,name=LoraBase,proto3" json:"LoraBase,omitempty"` - LoraAdapter string `protobuf:"bytes,36,opt,name=LoraAdapter,proto3" json:"LoraAdapter,omitempty"` - LoraScale float32 `protobuf:"fixed32,42,opt,name=LoraScale,proto3" json:"LoraScale,omitempty"` - NoMulMatQ bool `protobuf:"varint,37,opt,name=NoMulMatQ,proto3" json:"NoMulMatQ,omitempty"` - DraftModel string `protobuf:"bytes,39,opt,name=DraftModel,proto3" json:"DraftModel,omitempty"` - AudioPath string `protobuf:"bytes,38,opt,name=AudioPath,proto3" json:"AudioPath,omitempty"` - // vllm - Quantization string `protobuf:"bytes,40,opt,name=Quantization,proto3" json:"Quantization,omitempty"` - GPUMemoryUtilization float32 `protobuf:"fixed32,50,opt,name=GPUMemoryUtilization,proto3" json:"GPUMemoryUtilization,omitempty"` - TrustRemoteCode bool `protobuf:"varint,51,opt,name=TrustRemoteCode,proto3" json:"TrustRemoteCode,omitempty"` - EnforceEager bool `protobuf:"varint,52,opt,name=EnforceEager,proto3" json:"EnforceEager,omitempty"` - SwapSpace int32 `protobuf:"varint,53,opt,name=SwapSpace,proto3" json:"SwapSpace,omitempty"` - MaxModelLen int32 `protobuf:"varint,54,opt,name=MaxModelLen,proto3" json:"MaxModelLen,omitempty"` - MMProj string `protobuf:"bytes,41,opt,name=MMProj,proto3" json:"MMProj,omitempty"` - RopeScaling string `protobuf:"bytes,43,opt,name=RopeScaling,proto3" json:"RopeScaling,omitempty"` - YarnExtFactor float32 `protobuf:"fixed32,44,opt,name=YarnExtFactor,proto3" json:"YarnExtFactor,omitempty"` - YarnAttnFactor float32 `protobuf:"fixed32,45,opt,name=YarnAttnFactor,proto3" json:"YarnAttnFactor,omitempty"` - YarnBetaFast float32 `protobuf:"fixed32,46,opt,name=YarnBetaFast,proto3" json:"YarnBetaFast,omitempty"` - YarnBetaSlow float32 `protobuf:"fixed32,47,opt,name=YarnBetaSlow,proto3" json:"YarnBetaSlow,omitempty"` - Type string `protobuf:"bytes,49,opt,name=Type,proto3" json:"Type,omitempty"` -} - -func (x *ModelOptions) Reset() { - *x = ModelOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[11] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *ModelOptions) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*ModelOptions) ProtoMessage() {} - -func (x *ModelOptions) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[11] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use ModelOptions.ProtoReflect.Descriptor instead. -func (*ModelOptions) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{11} -} - -func (x *ModelOptions) GetModel() string { - if x != nil { - return x.Model - } - return "" -} - -func (x *ModelOptions) GetContextSize() int32 { - if x != nil { - return x.ContextSize - } - return 0 -} - -func (x *ModelOptions) GetSeed() int32 { - if x != nil { - return x.Seed - } - return 0 -} - -func (x *ModelOptions) GetNBatch() int32 { - if x != nil { - return x.NBatch - } - return 0 -} - -func (x *ModelOptions) GetF16Memory() bool { - if x != nil { - return x.F16Memory - } - return false -} - -func (x *ModelOptions) GetMLock() bool { - if x != nil { - return x.MLock - } - return false -} - -func (x *ModelOptions) GetMMap() bool { - if x != nil { - return x.MMap - } - return false -} - -func (x *ModelOptions) GetVocabOnly() bool { - if x != nil { - return x.VocabOnly - } - return false -} - -func (x *ModelOptions) GetLowVRAM() bool { - if x != nil { - return x.LowVRAM - } - return false -} - -func (x *ModelOptions) GetEmbeddings() bool { - if x != nil { - return x.Embeddings - } - return false -} - -func (x *ModelOptions) GetNUMA() bool { - if x != nil { - return x.NUMA - } - return false -} - -func (x *ModelOptions) GetNGPULayers() int32 { - if x != nil { - return x.NGPULayers - } - return 0 -} - -func (x *ModelOptions) GetMainGPU() string { - if x != nil { - return x.MainGPU - } - return "" -} - -func (x *ModelOptions) GetTensorSplit() string { - if x != nil { - return x.TensorSplit - } - return "" -} - -func (x *ModelOptions) GetThreads() int32 { - if x != nil { - return x.Threads - } - return 0 -} - -func (x *ModelOptions) GetLibrarySearchPath() string { - if x != nil { - return x.LibrarySearchPath - } - return "" -} - -func (x *ModelOptions) GetRopeFreqBase() float32 { - if x != nil { - return x.RopeFreqBase - } - return 0 -} - -func (x *ModelOptions) GetRopeFreqScale() float32 { - if x != nil { - return x.RopeFreqScale - } - return 0 -} - -func (x *ModelOptions) GetRMSNormEps() float32 { - if x != nil { - return x.RMSNormEps - } - return 0 -} - -func (x *ModelOptions) GetNGQA() int32 { - if x != nil { - return x.NGQA - } - return 0 -} - -func (x *ModelOptions) GetModelFile() string { - if x != nil { - return x.ModelFile - } - return "" -} - -func (x *ModelOptions) GetDevice() string { - if x != nil { - return x.Device - } - return "" -} - -func (x *ModelOptions) GetUseTriton() bool { - if x != nil { - return x.UseTriton - } - return false -} - -func (x *ModelOptions) GetModelBaseName() string { - if x != nil { - return x.ModelBaseName - } - return "" -} - -func (x *ModelOptions) GetUseFastTokenizer() bool { - if x != nil { - return x.UseFastTokenizer - } - return false -} - -func (x *ModelOptions) GetPipelineType() string { - if x != nil { - return x.PipelineType - } - return "" -} - -func (x *ModelOptions) GetSchedulerType() string { - if x != nil { - return x.SchedulerType - } - return "" -} - -func (x *ModelOptions) GetCUDA() bool { - if x != nil { - return x.CUDA - } - return false -} - -func (x *ModelOptions) GetCFGScale() float32 { - if x != nil { - return x.CFGScale - } - return 0 -} - -func (x *ModelOptions) GetIMG2IMG() bool { - if x != nil { - return x.IMG2IMG - } - return false -} - -func (x *ModelOptions) GetCLIPModel() string { - if x != nil { - return x.CLIPModel - } - return "" -} - -func (x *ModelOptions) GetCLIPSubfolder() string { - if x != nil { - return x.CLIPSubfolder - } - return "" -} - -func (x *ModelOptions) GetCLIPSkip() int32 { - if x != nil { - return x.CLIPSkip - } - return 0 -} - -func (x *ModelOptions) GetControlNet() string { - if x != nil { - return x.ControlNet - } - return "" -} - -func (x *ModelOptions) GetTokenizer() string { - if x != nil { - return x.Tokenizer - } - return "" -} - -func (x *ModelOptions) GetLoraBase() string { - if x != nil { - return x.LoraBase - } - return "" -} - -func (x *ModelOptions) GetLoraAdapter() string { - if x != nil { - return x.LoraAdapter - } - return "" -} - -func (x *ModelOptions) GetLoraScale() float32 { - if x != nil { - return x.LoraScale - } - return 0 -} - -func (x *ModelOptions) GetNoMulMatQ() bool { - if x != nil { - return x.NoMulMatQ - } - return false -} - -func (x *ModelOptions) GetDraftModel() string { - if x != nil { - return x.DraftModel - } - return "" -} - -func (x *ModelOptions) GetAudioPath() string { - if x != nil { - return x.AudioPath - } - return "" -} - -func (x *ModelOptions) GetQuantization() string { - if x != nil { - return x.Quantization - } - return "" -} - -func (x *ModelOptions) GetGPUMemoryUtilization() float32 { - if x != nil { - return x.GPUMemoryUtilization - } - return 0 -} - -func (x *ModelOptions) GetTrustRemoteCode() bool { - if x != nil { - return x.TrustRemoteCode - } - return false -} - -func (x *ModelOptions) GetEnforceEager() bool { - if x != nil { - return x.EnforceEager - } - return false -} - -func (x *ModelOptions) GetSwapSpace() int32 { - if x != nil { - return x.SwapSpace - } - return 0 -} - -func (x *ModelOptions) GetMaxModelLen() int32 { - if x != nil { - return x.MaxModelLen - } - return 0 -} - -func (x *ModelOptions) GetMMProj() string { - if x != nil { - return x.MMProj - } - return "" -} - -func (x *ModelOptions) GetRopeScaling() string { - if x != nil { - return x.RopeScaling - } - return "" -} - -func (x *ModelOptions) GetYarnExtFactor() float32 { - if x != nil { - return x.YarnExtFactor - } - return 0 -} - -func (x *ModelOptions) GetYarnAttnFactor() float32 { - if x != nil { - return x.YarnAttnFactor - } - return 0 -} - -func (x *ModelOptions) GetYarnBetaFast() float32 { - if x != nil { - return x.YarnBetaFast - } - return 0 -} - -func (x *ModelOptions) GetYarnBetaSlow() float32 { - if x != nil { - return x.YarnBetaSlow - } - return 0 -} - -func (x *ModelOptions) GetType() string { - if x != nil { - return x.Type - } - return "" -} - -type Result struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Message string `protobuf:"bytes,1,opt,name=message,proto3" json:"message,omitempty"` - Success bool `protobuf:"varint,2,opt,name=success,proto3" json:"success,omitempty"` -} - -func (x *Result) Reset() { - *x = Result{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[12] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *Result) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*Result) ProtoMessage() {} - -func (x *Result) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[12] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use Result.ProtoReflect.Descriptor instead. -func (*Result) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{12} -} - -func (x *Result) GetMessage() string { - if x != nil { - return x.Message - } - return "" -} - -func (x *Result) GetSuccess() bool { - if x != nil { - return x.Success - } - return false -} - -type EmbeddingResult struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Embeddings []float32 `protobuf:"fixed32,1,rep,packed,name=embeddings,proto3" json:"embeddings,omitempty"` -} - -func (x *EmbeddingResult) Reset() { - *x = EmbeddingResult{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[13] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *EmbeddingResult) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*EmbeddingResult) ProtoMessage() {} - -func (x *EmbeddingResult) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[13] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use EmbeddingResult.ProtoReflect.Descriptor instead. -func (*EmbeddingResult) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{13} -} - -func (x *EmbeddingResult) GetEmbeddings() []float32 { - if x != nil { - return x.Embeddings - } - return nil -} - -type TranscriptRequest struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Dst string `protobuf:"bytes,2,opt,name=dst,proto3" json:"dst,omitempty"` - Language string `protobuf:"bytes,3,opt,name=language,proto3" json:"language,omitempty"` - Threads uint32 `protobuf:"varint,4,opt,name=threads,proto3" json:"threads,omitempty"` -} - -func (x *TranscriptRequest) Reset() { - *x = TranscriptRequest{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[14] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *TranscriptRequest) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*TranscriptRequest) ProtoMessage() {} - -func (x *TranscriptRequest) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[14] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use TranscriptRequest.ProtoReflect.Descriptor instead. -func (*TranscriptRequest) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{14} -} - -func (x *TranscriptRequest) GetDst() string { - if x != nil { - return x.Dst - } - return "" -} - -func (x *TranscriptRequest) GetLanguage() string { - if x != nil { - return x.Language - } - return "" -} - -func (x *TranscriptRequest) GetThreads() uint32 { - if x != nil { - return x.Threads - } - return 0 -} - -type TranscriptResult struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Segments []*TranscriptSegment `protobuf:"bytes,1,rep,name=segments,proto3" json:"segments,omitempty"` - Text string `protobuf:"bytes,2,opt,name=text,proto3" json:"text,omitempty"` -} - -func (x *TranscriptResult) Reset() { - *x = TranscriptResult{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[15] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *TranscriptResult) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*TranscriptResult) ProtoMessage() {} - -func (x *TranscriptResult) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[15] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use TranscriptResult.ProtoReflect.Descriptor instead. -func (*TranscriptResult) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{15} -} - -func (x *TranscriptResult) GetSegments() []*TranscriptSegment { - if x != nil { - return x.Segments - } - return nil -} - -func (x *TranscriptResult) GetText() string { - if x != nil { - return x.Text - } - return "" -} - -type TranscriptSegment struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Id int32 `protobuf:"varint,1,opt,name=id,proto3" json:"id,omitempty"` - Start int64 `protobuf:"varint,2,opt,name=start,proto3" json:"start,omitempty"` - End int64 `protobuf:"varint,3,opt,name=end,proto3" json:"end,omitempty"` - Text string `protobuf:"bytes,4,opt,name=text,proto3" json:"text,omitempty"` - Tokens []int32 `protobuf:"varint,5,rep,packed,name=tokens,proto3" json:"tokens,omitempty"` -} - -func (x *TranscriptSegment) Reset() { - *x = TranscriptSegment{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[16] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *TranscriptSegment) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*TranscriptSegment) ProtoMessage() {} - -func (x *TranscriptSegment) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[16] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use TranscriptSegment.ProtoReflect.Descriptor instead. -func (*TranscriptSegment) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{16} -} - -func (x *TranscriptSegment) GetId() int32 { - if x != nil { - return x.Id - } - return 0 -} - -func (x *TranscriptSegment) GetStart() int64 { - if x != nil { - return x.Start - } - return 0 -} - -func (x *TranscriptSegment) GetEnd() int64 { - if x != nil { - return x.End - } - return 0 -} - -func (x *TranscriptSegment) GetText() string { - if x != nil { - return x.Text - } - return "" -} - -func (x *TranscriptSegment) GetTokens() []int32 { - if x != nil { - return x.Tokens - } - return nil -} - -type GenerateImageRequest struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Height int32 `protobuf:"varint,1,opt,name=height,proto3" json:"height,omitempty"` - Width int32 `protobuf:"varint,2,opt,name=width,proto3" json:"width,omitempty"` - Mode int32 `protobuf:"varint,3,opt,name=mode,proto3" json:"mode,omitempty"` - Step int32 `protobuf:"varint,4,opt,name=step,proto3" json:"step,omitempty"` - Seed int32 `protobuf:"varint,5,opt,name=seed,proto3" json:"seed,omitempty"` - PositivePrompt string `protobuf:"bytes,6,opt,name=positive_prompt,json=positivePrompt,proto3" json:"positive_prompt,omitempty"` - NegativePrompt string `protobuf:"bytes,7,opt,name=negative_prompt,json=negativePrompt,proto3" json:"negative_prompt,omitempty"` - Dst string `protobuf:"bytes,8,opt,name=dst,proto3" json:"dst,omitempty"` - Src string `protobuf:"bytes,9,opt,name=src,proto3" json:"src,omitempty"` - // Diffusers - EnableParameters string `protobuf:"bytes,10,opt,name=EnableParameters,proto3" json:"EnableParameters,omitempty"` - CLIPSkip int32 `protobuf:"varint,11,opt,name=CLIPSkip,proto3" json:"CLIPSkip,omitempty"` -} - -func (x *GenerateImageRequest) Reset() { - *x = GenerateImageRequest{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[17] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *GenerateImageRequest) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*GenerateImageRequest) ProtoMessage() {} - -func (x *GenerateImageRequest) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[17] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use GenerateImageRequest.ProtoReflect.Descriptor instead. -func (*GenerateImageRequest) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{17} -} - -func (x *GenerateImageRequest) GetHeight() int32 { - if x != nil { - return x.Height - } - return 0 -} - -func (x *GenerateImageRequest) GetWidth() int32 { - if x != nil { - return x.Width - } - return 0 -} - -func (x *GenerateImageRequest) GetMode() int32 { - if x != nil { - return x.Mode - } - return 0 -} - -func (x *GenerateImageRequest) GetStep() int32 { - if x != nil { - return x.Step - } - return 0 -} - -func (x *GenerateImageRequest) GetSeed() int32 { - if x != nil { - return x.Seed - } - return 0 -} - -func (x *GenerateImageRequest) GetPositivePrompt() string { - if x != nil { - return x.PositivePrompt - } - return "" -} - -func (x *GenerateImageRequest) GetNegativePrompt() string { - if x != nil { - return x.NegativePrompt - } - return "" -} - -func (x *GenerateImageRequest) GetDst() string { - if x != nil { - return x.Dst - } - return "" -} - -func (x *GenerateImageRequest) GetSrc() string { - if x != nil { - return x.Src - } - return "" -} - -func (x *GenerateImageRequest) GetEnableParameters() string { - if x != nil { - return x.EnableParameters - } - return "" -} - -func (x *GenerateImageRequest) GetCLIPSkip() int32 { - if x != nil { - return x.CLIPSkip - } - return 0 -} - -type TTSRequest struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Text string `protobuf:"bytes,1,opt,name=text,proto3" json:"text,omitempty"` - Model string `protobuf:"bytes,2,opt,name=model,proto3" json:"model,omitempty"` - Dst string `protobuf:"bytes,3,opt,name=dst,proto3" json:"dst,omitempty"` - Voice string `protobuf:"bytes,4,opt,name=voice,proto3" json:"voice,omitempty"` -} - -func (x *TTSRequest) Reset() { - *x = TTSRequest{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[18] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *TTSRequest) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*TTSRequest) ProtoMessage() {} - -func (x *TTSRequest) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[18] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use TTSRequest.ProtoReflect.Descriptor instead. -func (*TTSRequest) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{18} -} - -func (x *TTSRequest) GetText() string { - if x != nil { - return x.Text - } - return "" -} - -func (x *TTSRequest) GetModel() string { - if x != nil { - return x.Model - } - return "" -} - -func (x *TTSRequest) GetDst() string { - if x != nil { - return x.Dst - } - return "" -} - -func (x *TTSRequest) GetVoice() string { - if x != nil { - return x.Voice - } - return "" -} - -type TokenizationResponse struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Length int32 `protobuf:"varint,1,opt,name=length,proto3" json:"length,omitempty"` - Tokens []int32 `protobuf:"varint,2,rep,packed,name=tokens,proto3" json:"tokens,omitempty"` -} - -func (x *TokenizationResponse) Reset() { - *x = TokenizationResponse{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[19] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *TokenizationResponse) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*TokenizationResponse) ProtoMessage() {} - -func (x *TokenizationResponse) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[19] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use TokenizationResponse.ProtoReflect.Descriptor instead. -func (*TokenizationResponse) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{19} -} - -func (x *TokenizationResponse) GetLength() int32 { - if x != nil { - return x.Length - } - return 0 -} - -func (x *TokenizationResponse) GetTokens() []int32 { - if x != nil { - return x.Tokens - } - return nil -} - -type MemoryUsageData struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Total uint64 `protobuf:"varint,1,opt,name=total,proto3" json:"total,omitempty"` - Breakdown map[string]uint64 `protobuf:"bytes,2,rep,name=breakdown,proto3" json:"breakdown,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"varint,2,opt,name=value,proto3"` -} - -func (x *MemoryUsageData) Reset() { - *x = MemoryUsageData{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[20] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *MemoryUsageData) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*MemoryUsageData) ProtoMessage() {} - -func (x *MemoryUsageData) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[20] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use MemoryUsageData.ProtoReflect.Descriptor instead. -func (*MemoryUsageData) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{20} -} - -func (x *MemoryUsageData) GetTotal() uint64 { - if x != nil { - return x.Total - } - return 0 -} - -func (x *MemoryUsageData) GetBreakdown() map[string]uint64 { - if x != nil { - return x.Breakdown - } - return nil -} - -type StatusResponse struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - State StatusResponse_State `protobuf:"varint,1,opt,name=state,proto3,enum=backend.StatusResponse_State" json:"state,omitempty"` - Memory *MemoryUsageData `protobuf:"bytes,2,opt,name=memory,proto3" json:"memory,omitempty"` -} - -func (x *StatusResponse) Reset() { - *x = StatusResponse{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[21] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *StatusResponse) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*StatusResponse) ProtoMessage() {} - -func (x *StatusResponse) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[21] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use StatusResponse.ProtoReflect.Descriptor instead. -func (*StatusResponse) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{21} -} - -func (x *StatusResponse) GetState() StatusResponse_State { - if x != nil { - return x.State - } - return StatusResponse_UNINITIALIZED -} - -func (x *StatusResponse) GetMemory() *MemoryUsageData { - if x != nil { - return x.Memory - } - return nil -} - -type Message struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Role string `protobuf:"bytes,1,opt,name=role,proto3" json:"role,omitempty"` - Content string `protobuf:"bytes,2,opt,name=content,proto3" json:"content,omitempty"` -} - -func (x *Message) Reset() { - *x = Message{} - if protoimpl.UnsafeEnabled { - mi := &file_backend_proto_msgTypes[22] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *Message) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*Message) ProtoMessage() {} - -func (x *Message) ProtoReflect() protoreflect.Message { - mi := &file_backend_proto_msgTypes[22] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use Message.ProtoReflect.Descriptor instead. -func (*Message) Descriptor() ([]byte, []int) { - return file_backend_proto_rawDescGZIP(), []int{22} -} - -func (x *Message) GetRole() string { - if x != nil { - return x.Role - } - return "" -} - -func (x *Message) GetContent() string { - if x != nil { - return x.Content - } - return "" -} - -var File_backend_proto protoreflect.FileDescriptor - -var file_backend_proto_rawDesc = []byte{ - 0x0a, 0x0d, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, - 0x07, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x22, 0x23, 0x0a, 0x09, 0x53, 0x74, 0x6f, 0x72, - 0x65, 0x73, 0x4b, 0x65, 0x79, 0x12, 0x16, 0x0a, 0x06, 0x46, 0x6c, 0x6f, 0x61, 0x74, 0x73, 0x18, - 0x01, 0x20, 0x03, 0x28, 0x02, 0x52, 0x06, 0x46, 0x6c, 0x6f, 0x61, 0x74, 0x73, 0x22, 0x23, 0x0a, - 0x0b, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x14, 0x0a, 0x05, - 0x42, 0x79, 0x74, 0x65, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x05, 0x42, 0x79, 0x74, - 0x65, 0x73, 0x22, 0x68, 0x0a, 0x10, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x53, 0x65, 0x74, 0x4f, - 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x26, 0x0a, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x18, 0x01, - 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, - 0x74, 0x6f, 0x72, 0x65, 0x73, 0x4b, 0x65, 0x79, 0x52, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x12, 0x2c, - 0x0a, 0x06, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x14, - 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x56, - 0x61, 0x6c, 0x75, 0x65, 0x52, 0x06, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x22, 0x3d, 0x0a, 0x13, - 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x4f, 0x70, 0x74, 0x69, - 0x6f, 0x6e, 0x73, 0x12, 0x26, 0x0a, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, - 0x0b, 0x32, 0x12, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, - 0x65, 0x73, 0x4b, 0x65, 0x79, 0x52, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x22, 0x3a, 0x0a, 0x10, 0x53, - 0x74, 0x6f, 0x72, 0x65, 0x73, 0x47, 0x65, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, - 0x26, 0x0a, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, - 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x4b, 0x65, - 0x79, 0x52, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x22, 0x67, 0x0a, 0x0f, 0x53, 0x74, 0x6f, 0x72, 0x65, - 0x73, 0x47, 0x65, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x26, 0x0a, 0x04, 0x4b, 0x65, - 0x79, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, - 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x4b, 0x65, 0x79, 0x52, 0x04, 0x4b, 0x65, - 0x79, 0x73, 0x12, 0x2c, 0x0a, 0x06, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, - 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, - 0x72, 0x65, 0x73, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, 0x06, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, - 0x22, 0x4d, 0x0a, 0x11, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69, 0x6e, 0x64, 0x4f, 0x70, - 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x24, 0x0a, 0x03, 0x4b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, - 0x72, 0x65, 0x73, 0x4b, 0x65, 0x79, 0x52, 0x03, 0x4b, 0x65, 0x79, 0x12, 0x12, 0x0a, 0x04, 0x54, - 0x6f, 0x70, 0x4b, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x54, 0x6f, 0x70, 0x4b, 0x22, - 0x8c, 0x01, 0x0a, 0x10, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69, 0x6e, 0x64, 0x52, 0x65, - 0x73, 0x75, 0x6c, 0x74, 0x12, 0x26, 0x0a, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x18, 0x01, 0x20, 0x03, - 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, - 0x72, 0x65, 0x73, 0x4b, 0x65, 0x79, 0x52, 0x04, 0x4b, 0x65, 0x79, 0x73, 0x12, 0x2c, 0x0a, 0x06, - 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x62, - 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x56, 0x61, 0x6c, - 0x75, 0x65, 0x52, 0x06, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x12, 0x22, 0x0a, 0x0c, 0x53, 0x69, - 0x6d, 0x69, 0x6c, 0x61, 0x72, 0x69, 0x74, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x02, - 0x52, 0x0c, 0x53, 0x69, 0x6d, 0x69, 0x6c, 0x61, 0x72, 0x69, 0x74, 0x69, 0x65, 0x73, 0x22, 0x0f, - 0x0a, 0x0d, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, - 0xd6, 0x0a, 0x0a, 0x0e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, - 0x6e, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x06, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x53, 0x65, - 0x65, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x53, 0x65, 0x65, 0x64, 0x12, 0x18, - 0x0a, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, - 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x54, 0x6f, 0x6b, 0x65, - 0x6e, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73, - 0x12, 0x12, 0x0a, 0x04, 0x54, 0x6f, 0x70, 0x4b, 0x18, 0x05, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, - 0x54, 0x6f, 0x70, 0x4b, 0x12, 0x16, 0x0a, 0x06, 0x52, 0x65, 0x70, 0x65, 0x61, 0x74, 0x18, 0x06, - 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x52, 0x65, 0x70, 0x65, 0x61, 0x74, 0x12, 0x14, 0x0a, 0x05, - 0x42, 0x61, 0x74, 0x63, 0x68, 0x18, 0x07, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x42, 0x61, 0x74, - 0x63, 0x68, 0x12, 0x14, 0x0a, 0x05, 0x4e, 0x4b, 0x65, 0x65, 0x70, 0x18, 0x08, 0x20, 0x01, 0x28, - 0x05, 0x52, 0x05, 0x4e, 0x4b, 0x65, 0x65, 0x70, 0x12, 0x20, 0x0a, 0x0b, 0x54, 0x65, 0x6d, 0x70, - 0x65, 0x72, 0x61, 0x74, 0x75, 0x72, 0x65, 0x18, 0x09, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0b, 0x54, - 0x65, 0x6d, 0x70, 0x65, 0x72, 0x61, 0x74, 0x75, 0x72, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x50, 0x65, - 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x02, 0x52, 0x07, 0x50, 0x65, 0x6e, - 0x61, 0x6c, 0x74, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x46, 0x31, 0x36, 0x4b, 0x56, 0x18, 0x0b, 0x20, - 0x01, 0x28, 0x08, 0x52, 0x05, 0x46, 0x31, 0x36, 0x4b, 0x56, 0x12, 0x1c, 0x0a, 0x09, 0x44, 0x65, - 0x62, 0x75, 0x67, 0x4d, 0x6f, 0x64, 0x65, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x44, - 0x65, 0x62, 0x75, 0x67, 0x4d, 0x6f, 0x64, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x53, 0x74, 0x6f, 0x70, - 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x73, 0x18, 0x0d, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0b, 0x53, - 0x74, 0x6f, 0x70, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x49, 0x67, - 0x6e, 0x6f, 0x72, 0x65, 0x45, 0x4f, 0x53, 0x18, 0x0e, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x49, - 0x67, 0x6e, 0x6f, 0x72, 0x65, 0x45, 0x4f, 0x53, 0x12, 0x2c, 0x0a, 0x11, 0x54, 0x61, 0x69, 0x6c, - 0x46, 0x72, 0x65, 0x65, 0x53, 0x61, 0x6d, 0x70, 0x6c, 0x69, 0x6e, 0x67, 0x5a, 0x18, 0x0f, 0x20, - 0x01, 0x28, 0x02, 0x52, 0x11, 0x54, 0x61, 0x69, 0x6c, 0x46, 0x72, 0x65, 0x65, 0x53, 0x61, 0x6d, - 0x70, 0x6c, 0x69, 0x6e, 0x67, 0x5a, 0x12, 0x1a, 0x0a, 0x08, 0x54, 0x79, 0x70, 0x69, 0x63, 0x61, - 0x6c, 0x50, 0x18, 0x10, 0x20, 0x01, 0x28, 0x02, 0x52, 0x08, 0x54, 0x79, 0x70, 0x69, 0x63, 0x61, - 0x6c, 0x50, 0x12, 0x2a, 0x0a, 0x10, 0x46, 0x72, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x63, 0x79, 0x50, - 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x18, 0x11, 0x20, 0x01, 0x28, 0x02, 0x52, 0x10, 0x46, 0x72, - 0x65, 0x71, 0x75, 0x65, 0x6e, 0x63, 0x79, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x12, 0x28, - 0x0a, 0x0f, 0x50, 0x72, 0x65, 0x73, 0x65, 0x6e, 0x63, 0x65, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, - 0x79, 0x18, 0x12, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0f, 0x50, 0x72, 0x65, 0x73, 0x65, 0x6e, 0x63, - 0x65, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x12, 0x1a, 0x0a, 0x08, 0x4d, 0x69, 0x72, 0x6f, - 0x73, 0x74, 0x61, 0x74, 0x18, 0x13, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x4d, 0x69, 0x72, 0x6f, - 0x73, 0x74, 0x61, 0x74, 0x12, 0x20, 0x0a, 0x0b, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, - 0x45, 0x54, 0x41, 0x18, 0x14, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0b, 0x4d, 0x69, 0x72, 0x6f, 0x73, - 0x74, 0x61, 0x74, 0x45, 0x54, 0x41, 0x12, 0x20, 0x0a, 0x0b, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, - 0x61, 0x74, 0x54, 0x41, 0x55, 0x18, 0x15, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0b, 0x4d, 0x69, 0x72, - 0x6f, 0x73, 0x74, 0x61, 0x74, 0x54, 0x41, 0x55, 0x12, 0x1e, 0x0a, 0x0a, 0x50, 0x65, 0x6e, 0x61, - 0x6c, 0x69, 0x7a, 0x65, 0x4e, 0x4c, 0x18, 0x16, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, 0x50, 0x65, - 0x6e, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x4e, 0x4c, 0x12, 0x1c, 0x0a, 0x09, 0x4c, 0x6f, 0x67, 0x69, - 0x74, 0x42, 0x69, 0x61, 0x73, 0x18, 0x17, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x4c, 0x6f, 0x67, - 0x69, 0x74, 0x42, 0x69, 0x61, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x18, - 0x19, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x12, 0x12, 0x0a, 0x04, - 0x4d, 0x4d, 0x61, 0x70, 0x18, 0x1a, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x4d, 0x4d, 0x61, 0x70, - 0x12, 0x26, 0x0a, 0x0e, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x41, - 0x6c, 0x6c, 0x18, 0x1b, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0e, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, - 0x43, 0x61, 0x63, 0x68, 0x65, 0x41, 0x6c, 0x6c, 0x12, 0x24, 0x0a, 0x0d, 0x50, 0x72, 0x6f, 0x6d, - 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x52, 0x4f, 0x18, 0x1c, 0x20, 0x01, 0x28, 0x08, 0x52, - 0x0d, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x52, 0x4f, 0x12, 0x18, - 0x0a, 0x07, 0x47, 0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72, 0x18, 0x1d, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x07, 0x47, 0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72, 0x12, 0x18, 0x0a, 0x07, 0x4d, 0x61, 0x69, 0x6e, - 0x47, 0x50, 0x55, 0x18, 0x1e, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, - 0x50, 0x55, 0x12, 0x20, 0x0a, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, - 0x74, 0x18, 0x1f, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, - 0x70, 0x6c, 0x69, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x54, 0x6f, 0x70, 0x50, 0x18, 0x20, 0x20, 0x01, - 0x28, 0x02, 0x52, 0x04, 0x54, 0x6f, 0x70, 0x50, 0x12, 0x28, 0x0a, 0x0f, 0x50, 0x72, 0x6f, 0x6d, - 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x50, 0x61, 0x74, 0x68, 0x18, 0x21, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x0f, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x50, 0x61, - 0x74, 0x68, 0x12, 0x14, 0x0a, 0x05, 0x44, 0x65, 0x62, 0x75, 0x67, 0x18, 0x22, 0x20, 0x01, 0x28, - 0x08, 0x52, 0x05, 0x44, 0x65, 0x62, 0x75, 0x67, 0x12, 0x28, 0x0a, 0x0f, 0x45, 0x6d, 0x62, 0x65, - 0x64, 0x64, 0x69, 0x6e, 0x67, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x23, 0x20, 0x03, 0x28, - 0x05, 0x52, 0x0f, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x54, 0x6f, 0x6b, 0x65, - 0x6e, 0x73, 0x12, 0x1e, 0x0a, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, - 0x18, 0x24, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, - 0x67, 0x73, 0x12, 0x22, 0x0a, 0x0c, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x42, 0x61, - 0x73, 0x65, 0x18, 0x25, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, - 0x65, 0x71, 0x42, 0x61, 0x73, 0x65, 0x12, 0x24, 0x0a, 0x0d, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, - 0x65, 0x71, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x26, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0d, 0x52, - 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x30, 0x0a, 0x13, - 0x4e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x53, 0x63, - 0x61, 0x6c, 0x65, 0x18, 0x27, 0x20, 0x01, 0x28, 0x02, 0x52, 0x13, 0x4e, 0x65, 0x67, 0x61, 0x74, - 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x26, - 0x0a, 0x0e, 0x4e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, - 0x18, 0x28, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x4e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, - 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x4e, 0x44, 0x72, 0x61, 0x66, 0x74, - 0x18, 0x29, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x4e, 0x44, 0x72, 0x61, 0x66, 0x74, 0x12, 0x16, - 0x0a, 0x06, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x73, 0x18, 0x2a, 0x20, 0x03, 0x28, 0x09, 0x52, 0x06, - 0x49, 0x6d, 0x61, 0x67, 0x65, 0x73, 0x12, 0x32, 0x0a, 0x14, 0x55, 0x73, 0x65, 0x54, 0x6f, 0x6b, - 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x18, 0x2b, - 0x20, 0x01, 0x28, 0x08, 0x52, 0x14, 0x55, 0x73, 0x65, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, - 0x65, 0x72, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x12, 0x2c, 0x0a, 0x08, 0x4d, 0x65, - 0x73, 0x73, 0x61, 0x67, 0x65, 0x73, 0x18, 0x2c, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x10, 0x2e, 0x62, - 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x52, 0x08, - 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x73, 0x22, 0x21, 0x0a, 0x05, 0x52, 0x65, 0x70, 0x6c, - 0x79, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x0c, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0xa2, 0x0d, 0x0a, 0x0c, - 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x14, 0x0a, 0x05, - 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x4d, 0x6f, 0x64, - 0x65, 0x6c, 0x12, 0x20, 0x0a, 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x53, 0x69, 0x7a, - 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, - 0x53, 0x69, 0x7a, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x53, 0x65, 0x65, 0x64, 0x18, 0x03, 0x20, 0x01, - 0x28, 0x05, 0x52, 0x04, 0x53, 0x65, 0x65, 0x64, 0x12, 0x16, 0x0a, 0x06, 0x4e, 0x42, 0x61, 0x74, - 0x63, 0x68, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x4e, 0x42, 0x61, 0x74, 0x63, 0x68, - 0x12, 0x1c, 0x0a, 0x09, 0x46, 0x31, 0x36, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x18, 0x05, 0x20, - 0x01, 0x28, 0x08, 0x52, 0x09, 0x46, 0x31, 0x36, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x12, 0x14, - 0x0a, 0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x4d, - 0x4c, 0x6f, 0x63, 0x6b, 0x12, 0x12, 0x0a, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x18, 0x07, 0x20, 0x01, - 0x28, 0x08, 0x52, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x12, 0x1c, 0x0a, 0x09, 0x56, 0x6f, 0x63, 0x61, - 0x62, 0x4f, 0x6e, 0x6c, 0x79, 0x18, 0x08, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x56, 0x6f, 0x63, - 0x61, 0x62, 0x4f, 0x6e, 0x6c, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x4c, 0x6f, 0x77, 0x56, 0x52, 0x41, - 0x4d, 0x18, 0x09, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x4c, 0x6f, 0x77, 0x56, 0x52, 0x41, 0x4d, - 0x12, 0x1e, 0x0a, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x0a, - 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, - 0x12, 0x12, 0x0a, 0x04, 0x4e, 0x55, 0x4d, 0x41, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, - 0x4e, 0x55, 0x4d, 0x41, 0x12, 0x1e, 0x0a, 0x0a, 0x4e, 0x47, 0x50, 0x55, 0x4c, 0x61, 0x79, 0x65, - 0x72, 0x73, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0a, 0x4e, 0x47, 0x50, 0x55, 0x4c, 0x61, - 0x79, 0x65, 0x72, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x18, - 0x0d, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x12, 0x20, - 0x0a, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74, 0x18, 0x0e, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74, - 0x12, 0x18, 0x0a, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, 0x0f, 0x20, 0x01, 0x28, - 0x05, 0x52, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x12, 0x2c, 0x0a, 0x11, 0x4c, 0x69, - 0x62, 0x72, 0x61, 0x72, 0x79, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, 0x50, 0x61, 0x74, 0x68, 0x18, - 0x10, 0x20, 0x01, 0x28, 0x09, 0x52, 0x11, 0x4c, 0x69, 0x62, 0x72, 0x61, 0x72, 0x79, 0x53, 0x65, - 0x61, 0x72, 0x63, 0x68, 0x50, 0x61, 0x74, 0x68, 0x12, 0x22, 0x0a, 0x0c, 0x52, 0x6f, 0x70, 0x65, - 0x46, 0x72, 0x65, 0x71, 0x42, 0x61, 0x73, 0x65, 0x18, 0x11, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, - 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x42, 0x61, 0x73, 0x65, 0x12, 0x24, 0x0a, 0x0d, - 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x12, 0x20, - 0x01, 0x28, 0x02, 0x52, 0x0d, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61, - 0x6c, 0x65, 0x12, 0x1e, 0x0a, 0x0a, 0x52, 0x4d, 0x53, 0x4e, 0x6f, 0x72, 0x6d, 0x45, 0x70, 0x73, - 0x18, 0x13, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0a, 0x52, 0x4d, 0x53, 0x4e, 0x6f, 0x72, 0x6d, 0x45, - 0x70, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x4e, 0x47, 0x51, 0x41, 0x18, 0x14, 0x20, 0x01, 0x28, 0x05, - 0x52, 0x04, 0x4e, 0x47, 0x51, 0x41, 0x12, 0x1c, 0x0a, 0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x46, - 0x69, 0x6c, 0x65, 0x18, 0x15, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c, - 0x46, 0x69, 0x6c, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x44, 0x65, 0x76, 0x69, 0x63, 0x65, 0x18, 0x16, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x44, 0x65, 0x76, 0x69, 0x63, 0x65, 0x12, 0x1c, 0x0a, 0x09, - 0x55, 0x73, 0x65, 0x54, 0x72, 0x69, 0x74, 0x6f, 0x6e, 0x18, 0x17, 0x20, 0x01, 0x28, 0x08, 0x52, - 0x09, 0x55, 0x73, 0x65, 0x54, 0x72, 0x69, 0x74, 0x6f, 0x6e, 0x12, 0x24, 0x0a, 0x0d, 0x4d, 0x6f, - 0x64, 0x65, 0x6c, 0x42, 0x61, 0x73, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x18, 0x18, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x0d, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x42, 0x61, 0x73, 0x65, 0x4e, 0x61, 0x6d, 0x65, - 0x12, 0x2a, 0x0a, 0x10, 0x55, 0x73, 0x65, 0x46, 0x61, 0x73, 0x74, 0x54, 0x6f, 0x6b, 0x65, 0x6e, - 0x69, 0x7a, 0x65, 0x72, 0x18, 0x19, 0x20, 0x01, 0x28, 0x08, 0x52, 0x10, 0x55, 0x73, 0x65, 0x46, - 0x61, 0x73, 0x74, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x12, 0x22, 0x0a, 0x0c, - 0x50, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x54, 0x79, 0x70, 0x65, 0x18, 0x1a, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x0c, 0x50, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x54, 0x79, 0x70, 0x65, - 0x12, 0x24, 0x0a, 0x0d, 0x53, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, 0x65, 0x72, 0x54, 0x79, 0x70, - 0x65, 0x18, 0x1b, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x53, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, - 0x65, 0x72, 0x54, 0x79, 0x70, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x43, 0x55, 0x44, 0x41, 0x18, 0x1c, - 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x43, 0x55, 0x44, 0x41, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x46, - 0x47, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x1d, 0x20, 0x01, 0x28, 0x02, 0x52, 0x08, 0x43, 0x46, - 0x47, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x49, 0x4d, 0x47, 0x32, 0x49, 0x4d, - 0x47, 0x18, 0x1e, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x49, 0x4d, 0x47, 0x32, 0x49, 0x4d, 0x47, - 0x12, 0x1c, 0x0a, 0x09, 0x43, 0x4c, 0x49, 0x50, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x1f, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x09, 0x43, 0x4c, 0x49, 0x50, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x24, - 0x0a, 0x0d, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x75, 0x62, 0x66, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x18, - 0x20, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x75, 0x62, 0x66, 0x6f, - 0x6c, 0x64, 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, - 0x18, 0x21, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, - 0x12, 0x1e, 0x0a, 0x0a, 0x43, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x4e, 0x65, 0x74, 0x18, 0x30, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x43, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x4e, 0x65, 0x74, - 0x12, 0x1c, 0x0a, 0x09, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x18, 0x22, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x09, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x12, 0x1a, - 0x0a, 0x08, 0x4c, 0x6f, 0x72, 0x61, 0x42, 0x61, 0x73, 0x65, 0x18, 0x23, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x08, 0x4c, 0x6f, 0x72, 0x61, 0x42, 0x61, 0x73, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x4c, 0x6f, - 0x72, 0x61, 0x41, 0x64, 0x61, 0x70, 0x74, 0x65, 0x72, 0x18, 0x24, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x0b, 0x4c, 0x6f, 0x72, 0x61, 0x41, 0x64, 0x61, 0x70, 0x74, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, - 0x4c, 0x6f, 0x72, 0x61, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x2a, 0x20, 0x01, 0x28, 0x02, 0x52, - 0x09, 0x4c, 0x6f, 0x72, 0x61, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x1c, 0x0a, 0x09, 0x4e, 0x6f, - 0x4d, 0x75, 0x6c, 0x4d, 0x61, 0x74, 0x51, 0x18, 0x25, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x4e, - 0x6f, 0x4d, 0x75, 0x6c, 0x4d, 0x61, 0x74, 0x51, 0x12, 0x1e, 0x0a, 0x0a, 0x44, 0x72, 0x61, 0x66, - 0x74, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x27, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x44, 0x72, - 0x61, 0x66, 0x74, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x1c, 0x0a, 0x09, 0x41, 0x75, 0x64, 0x69, - 0x6f, 0x50, 0x61, 0x74, 0x68, 0x18, 0x26, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x41, 0x75, 0x64, - 0x69, 0x6f, 0x50, 0x61, 0x74, 0x68, 0x12, 0x22, 0x0a, 0x0c, 0x51, 0x75, 0x61, 0x6e, 0x74, 0x69, - 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x28, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x51, 0x75, - 0x61, 0x6e, 0x74, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x32, 0x0a, 0x14, 0x47, 0x50, - 0x55, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x74, 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, 0x69, - 0x6f, 0x6e, 0x18, 0x32, 0x20, 0x01, 0x28, 0x02, 0x52, 0x14, 0x47, 0x50, 0x55, 0x4d, 0x65, 0x6d, - 0x6f, 0x72, 0x79, 0x55, 0x74, 0x69, 0x6c, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x28, - 0x0a, 0x0f, 0x54, 0x72, 0x75, 0x73, 0x74, 0x52, 0x65, 0x6d, 0x6f, 0x74, 0x65, 0x43, 0x6f, 0x64, - 0x65, 0x18, 0x33, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0f, 0x54, 0x72, 0x75, 0x73, 0x74, 0x52, 0x65, - 0x6d, 0x6f, 0x74, 0x65, 0x43, 0x6f, 0x64, 0x65, 0x12, 0x22, 0x0a, 0x0c, 0x45, 0x6e, 0x66, 0x6f, - 0x72, 0x63, 0x65, 0x45, 0x61, 0x67, 0x65, 0x72, 0x18, 0x34, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0c, - 0x45, 0x6e, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x45, 0x61, 0x67, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, - 0x53, 0x77, 0x61, 0x70, 0x53, 0x70, 0x61, 0x63, 0x65, 0x18, 0x35, 0x20, 0x01, 0x28, 0x05, 0x52, - 0x09, 0x53, 0x77, 0x61, 0x70, 0x53, 0x70, 0x61, 0x63, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x4d, 0x61, - 0x78, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4c, 0x65, 0x6e, 0x18, 0x36, 0x20, 0x01, 0x28, 0x05, 0x52, - 0x0b, 0x4d, 0x61, 0x78, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4c, 0x65, 0x6e, 0x12, 0x16, 0x0a, 0x06, - 0x4d, 0x4d, 0x50, 0x72, 0x6f, 0x6a, 0x18, 0x29, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x4d, 0x4d, - 0x50, 0x72, 0x6f, 0x6a, 0x12, 0x20, 0x0a, 0x0b, 0x52, 0x6f, 0x70, 0x65, 0x53, 0x63, 0x61, 0x6c, - 0x69, 0x6e, 0x67, 0x18, 0x2b, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x52, 0x6f, 0x70, 0x65, 0x53, - 0x63, 0x61, 0x6c, 0x69, 0x6e, 0x67, 0x12, 0x24, 0x0a, 0x0d, 0x59, 0x61, 0x72, 0x6e, 0x45, 0x78, - 0x74, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x18, 0x2c, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0d, 0x59, - 0x61, 0x72, 0x6e, 0x45, 0x78, 0x74, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x12, 0x26, 0x0a, 0x0e, - 0x59, 0x61, 0x72, 0x6e, 0x41, 0x74, 0x74, 0x6e, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x18, 0x2d, - 0x20, 0x01, 0x28, 0x02, 0x52, 0x0e, 0x59, 0x61, 0x72, 0x6e, 0x41, 0x74, 0x74, 0x6e, 0x46, 0x61, - 0x63, 0x74, 0x6f, 0x72, 0x12, 0x22, 0x0a, 0x0c, 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, - 0x46, 0x61, 0x73, 0x74, 0x18, 0x2e, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, 0x59, 0x61, 0x72, 0x6e, - 0x42, 0x65, 0x74, 0x61, 0x46, 0x61, 0x73, 0x74, 0x12, 0x22, 0x0a, 0x0c, 0x59, 0x61, 0x72, 0x6e, - 0x42, 0x65, 0x74, 0x61, 0x53, 0x6c, 0x6f, 0x77, 0x18, 0x2f, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, - 0x59, 0x61, 0x72, 0x6e, 0x42, 0x65, 0x74, 0x61, 0x53, 0x6c, 0x6f, 0x77, 0x12, 0x12, 0x0a, 0x04, - 0x54, 0x79, 0x70, 0x65, 0x18, 0x31, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x54, 0x79, 0x70, 0x65, - 0x22, 0x3c, 0x0a, 0x06, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, - 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, - 0x73, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x18, - 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x22, 0x31, - 0x0a, 0x0f, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, - 0x74, 0x12, 0x1e, 0x0a, 0x0a, 0x65, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, - 0x01, 0x20, 0x03, 0x28, 0x02, 0x52, 0x0a, 0x65, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, - 0x73, 0x22, 0x5b, 0x0a, 0x11, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, - 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x6c, 0x61, 0x6e, 0x67, - 0x75, 0x61, 0x67, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x6c, 0x61, 0x6e, 0x67, - 0x75, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x74, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, - 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x07, 0x74, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x22, 0x5e, - 0x0a, 0x10, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x73, 0x75, - 0x6c, 0x74, 0x12, 0x36, 0x0a, 0x08, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x01, - 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, - 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, - 0x52, 0x08, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, - 0x78, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x22, 0x77, - 0x0a, 0x11, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x53, 0x65, 0x67, 0x6d, - 0x65, 0x6e, 0x74, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, - 0x02, 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x18, 0x02, 0x20, 0x01, - 0x28, 0x03, 0x52, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x65, 0x6e, 0x64, - 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x03, 0x65, 0x6e, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x74, - 0x65, 0x78, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, - 0x16, 0x0a, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x05, 0x52, - 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x22, 0xbe, 0x02, 0x0a, 0x14, 0x47, 0x65, 0x6e, 0x65, - 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, - 0x12, 0x16, 0x0a, 0x06, 0x68, 0x65, 0x69, 0x67, 0x68, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, - 0x52, 0x06, 0x68, 0x65, 0x69, 0x67, 0x68, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x77, 0x69, 0x64, 0x74, - 0x68, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x77, 0x69, 0x64, 0x74, 0x68, 0x12, 0x12, - 0x0a, 0x04, 0x6d, 0x6f, 0x64, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x6d, 0x6f, - 0x64, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x74, 0x65, 0x70, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, - 0x52, 0x04, 0x73, 0x74, 0x65, 0x70, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x65, 0x65, 0x64, 0x18, 0x05, - 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x73, 0x65, 0x65, 0x64, 0x12, 0x27, 0x0a, 0x0f, 0x70, 0x6f, - 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x06, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, - 0x6d, 0x70, 0x74, 0x12, 0x27, 0x0a, 0x0f, 0x6e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x5f, - 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x6e, 0x65, - 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x10, 0x0a, 0x03, - 0x64, 0x73, 0x74, 0x18, 0x08, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x12, 0x10, - 0x0a, 0x03, 0x73, 0x72, 0x63, 0x18, 0x09, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x72, 0x63, - 0x12, 0x2a, 0x0a, 0x10, 0x45, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65, - 0x74, 0x65, 0x72, 0x73, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x09, 0x52, 0x10, 0x45, 0x6e, 0x61, 0x62, - 0x6c, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x73, 0x12, 0x1a, 0x0a, 0x08, - 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, - 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x22, 0x5e, 0x0a, 0x0a, 0x54, 0x54, 0x53, 0x52, - 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x01, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x6d, 0x6f, - 0x64, 0x65, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, - 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, - 0x73, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x6f, 0x69, 0x63, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x05, 0x76, 0x6f, 0x69, 0x63, 0x65, 0x22, 0x46, 0x0a, 0x14, 0x54, 0x6f, 0x6b, 0x65, - 0x6e, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, - 0x12, 0x16, 0x0a, 0x06, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, - 0x52, 0x06, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x12, 0x16, 0x0a, 0x06, 0x74, 0x6f, 0x6b, 0x65, - 0x6e, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x05, 0x52, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, - 0x22, 0xac, 0x01, 0x0a, 0x0f, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65, - 0x44, 0x61, 0x74, 0x61, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x04, 0x52, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x12, 0x45, 0x0a, 0x09, 0x62, 0x72, - 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x27, 0x2e, - 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, - 0x61, 0x67, 0x65, 0x44, 0x61, 0x74, 0x61, 0x2e, 0x42, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, - 0x6e, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x09, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, - 0x6e, 0x1a, 0x3c, 0x0a, 0x0e, 0x42, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x45, 0x6e, - 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, - 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, - 0xbc, 0x01, 0x0a, 0x0e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, - 0x73, 0x65, 0x12, 0x33, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x0e, 0x32, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x61, 0x74, - 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, - 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x30, 0x0a, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, - 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, - 0x64, 0x2e, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65, 0x44, 0x61, 0x74, - 0x61, 0x52, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x22, 0x43, 0x0a, 0x05, 0x53, 0x74, 0x61, - 0x74, 0x65, 0x12, 0x11, 0x0a, 0x0d, 0x55, 0x4e, 0x49, 0x4e, 0x49, 0x54, 0x49, 0x41, 0x4c, 0x49, - 0x5a, 0x45, 0x44, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x42, 0x55, 0x53, 0x59, 0x10, 0x01, 0x12, - 0x09, 0x0a, 0x05, 0x52, 0x45, 0x41, 0x44, 0x59, 0x10, 0x02, 0x12, 0x12, 0x0a, 0x05, 0x45, 0x52, - 0x52, 0x4f, 0x52, 0x10, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0x22, 0x37, - 0x0a, 0x07, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x6f, 0x6c, - 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x72, 0x6f, 0x6c, 0x65, 0x12, 0x18, 0x0a, - 0x07, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, - 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x32, 0xfb, 0x06, 0x0a, 0x07, 0x42, 0x61, 0x63, 0x6b, - 0x65, 0x6e, 0x64, 0x12, 0x32, 0x0a, 0x06, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x12, 0x16, 0x2e, - 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, - 0x73, 0x73, 0x61, 0x67, 0x65, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, - 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x34, 0x0a, 0x07, 0x50, 0x72, 0x65, 0x64, 0x69, - 0x63, 0x74, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, - 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0e, 0x2e, 0x62, 0x61, - 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x35, 0x0a, - 0x09, 0x4c, 0x6f, 0x61, 0x64, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x15, 0x2e, 0x62, 0x61, 0x63, - 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, - 0x73, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, - 0x6c, 0x74, 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0d, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x53, - 0x74, 0x72, 0x65, 0x61, 0x6d, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, - 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0e, - 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, - 0x30, 0x01, 0x12, 0x40, 0x0a, 0x09, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x12, - 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, - 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, - 0x6e, 0x64, 0x2e, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, - 0x6c, 0x74, 0x22, 0x00, 0x12, 0x41, 0x0a, 0x0d, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, - 0x49, 0x6d, 0x61, 0x67, 0x65, 0x12, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, - 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x52, 0x65, 0x71, - 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, - 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4d, 0x0a, 0x12, 0x41, 0x75, 0x64, 0x69, 0x6f, - 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x1a, 0x2e, - 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, - 0x70, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x19, 0x2e, 0x62, 0x61, 0x63, 0x6b, - 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, - 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x2d, 0x0a, 0x03, 0x54, 0x54, 0x53, 0x12, 0x13, 0x2e, - 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x54, 0x53, 0x52, 0x65, 0x71, 0x75, 0x65, - 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, - 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4a, 0x0a, 0x0e, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, - 0x65, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, - 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, - 0x1a, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x6f, 0x6b, 0x65, 0x6e, - 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, - 0x00, 0x12, 0x3b, 0x0a, 0x06, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x16, 0x2e, 0x62, 0x61, - 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73, - 0x61, 0x67, 0x65, 0x1a, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, - 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x39, - 0x0a, 0x09, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x53, 0x65, 0x74, 0x12, 0x19, 0x2e, 0x62, 0x61, - 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x53, 0x65, 0x74, 0x4f, - 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, - 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x3f, 0x0a, 0x0c, 0x53, 0x74, 0x6f, - 0x72, 0x65, 0x73, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x12, 0x1c, 0x2e, 0x62, 0x61, 0x63, 0x6b, - 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, - 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, - 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x42, 0x0a, 0x09, 0x53, 0x74, - 0x6f, 0x72, 0x65, 0x73, 0x47, 0x65, 0x74, 0x12, 0x19, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, - 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x47, 0x65, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, - 0x6e, 0x73, 0x1a, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, - 0x72, 0x65, 0x73, 0x47, 0x65, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x45, - 0x0a, 0x0a, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69, 0x6e, 0x64, 0x12, 0x1a, 0x2e, 0x62, - 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69, 0x6e, - 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x19, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, - 0x6e, 0x64, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x73, 0x46, 0x69, 0x6e, 0x64, 0x52, 0x65, 0x73, - 0x75, 0x6c, 0x74, 0x22, 0x00, 0x42, 0x5a, 0x0a, 0x19, 0x69, 0x6f, 0x2e, 0x73, 0x6b, 0x79, 0x6e, - 0x65, 0x74, 0x2e, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x61, 0x69, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, - 0x6e, 0x64, 0x42, 0x0e, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49, 0x42, 0x61, 0x63, 0x6b, 0x65, - 0x6e, 0x64, 0x50, 0x01, 0x5a, 0x2b, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, - 0x2f, 0x67, 0x6f, 0x2d, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2f, 0x4c, 0x6f, 0x63, 0x61, 0x6c, - 0x41, 0x49, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x67, 0x72, 0x70, 0x63, 0x2f, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, -} - -var ( - file_backend_proto_rawDescOnce sync.Once - file_backend_proto_rawDescData = file_backend_proto_rawDesc -) - -func file_backend_proto_rawDescGZIP() []byte { - file_backend_proto_rawDescOnce.Do(func() { - file_backend_proto_rawDescData = protoimpl.X.CompressGZIP(file_backend_proto_rawDescData) - }) - return file_backend_proto_rawDescData -} - -var file_backend_proto_enumTypes = make([]protoimpl.EnumInfo, 1) -var file_backend_proto_msgTypes = make([]protoimpl.MessageInfo, 24) -var file_backend_proto_goTypes = []interface{}{ - (StatusResponse_State)(0), // 0: backend.StatusResponse.State - (*StoresKey)(nil), // 1: backend.StoresKey - (*StoresValue)(nil), // 2: backend.StoresValue - (*StoresSetOptions)(nil), // 3: backend.StoresSetOptions - (*StoresDeleteOptions)(nil), // 4: backend.StoresDeleteOptions - (*StoresGetOptions)(nil), // 5: backend.StoresGetOptions - (*StoresGetResult)(nil), // 6: backend.StoresGetResult - (*StoresFindOptions)(nil), // 7: backend.StoresFindOptions - (*StoresFindResult)(nil), // 8: backend.StoresFindResult - (*HealthMessage)(nil), // 9: backend.HealthMessage - (*PredictOptions)(nil), // 10: backend.PredictOptions - (*Reply)(nil), // 11: backend.Reply - (*ModelOptions)(nil), // 12: backend.ModelOptions - (*Result)(nil), // 13: backend.Result - (*EmbeddingResult)(nil), // 14: backend.EmbeddingResult - (*TranscriptRequest)(nil), // 15: backend.TranscriptRequest - (*TranscriptResult)(nil), // 16: backend.TranscriptResult - (*TranscriptSegment)(nil), // 17: backend.TranscriptSegment - (*GenerateImageRequest)(nil), // 18: backend.GenerateImageRequest - (*TTSRequest)(nil), // 19: backend.TTSRequest - (*TokenizationResponse)(nil), // 20: backend.TokenizationResponse - (*MemoryUsageData)(nil), // 21: backend.MemoryUsageData - (*StatusResponse)(nil), // 22: backend.StatusResponse - (*Message)(nil), // 23: backend.Message - nil, // 24: backend.MemoryUsageData.BreakdownEntry -} -var file_backend_proto_depIdxs = []int32{ - 1, // 0: backend.StoresSetOptions.Keys:type_name -> backend.StoresKey - 2, // 1: backend.StoresSetOptions.Values:type_name -> backend.StoresValue - 1, // 2: backend.StoresDeleteOptions.Keys:type_name -> backend.StoresKey - 1, // 3: backend.StoresGetOptions.Keys:type_name -> backend.StoresKey - 1, // 4: backend.StoresGetResult.Keys:type_name -> backend.StoresKey - 2, // 5: backend.StoresGetResult.Values:type_name -> backend.StoresValue - 1, // 6: backend.StoresFindOptions.Key:type_name -> backend.StoresKey - 1, // 7: backend.StoresFindResult.Keys:type_name -> backend.StoresKey - 2, // 8: backend.StoresFindResult.Values:type_name -> backend.StoresValue - 23, // 9: backend.PredictOptions.Messages:type_name -> backend.Message - 17, // 10: backend.TranscriptResult.segments:type_name -> backend.TranscriptSegment - 24, // 11: backend.MemoryUsageData.breakdown:type_name -> backend.MemoryUsageData.BreakdownEntry - 0, // 12: backend.StatusResponse.state:type_name -> backend.StatusResponse.State - 21, // 13: backend.StatusResponse.memory:type_name -> backend.MemoryUsageData - 9, // 14: backend.Backend.Health:input_type -> backend.HealthMessage - 10, // 15: backend.Backend.Predict:input_type -> backend.PredictOptions - 12, // 16: backend.Backend.LoadModel:input_type -> backend.ModelOptions - 10, // 17: backend.Backend.PredictStream:input_type -> backend.PredictOptions - 10, // 18: backend.Backend.Embedding:input_type -> backend.PredictOptions - 18, // 19: backend.Backend.GenerateImage:input_type -> backend.GenerateImageRequest - 15, // 20: backend.Backend.AudioTranscription:input_type -> backend.TranscriptRequest - 19, // 21: backend.Backend.TTS:input_type -> backend.TTSRequest - 10, // 22: backend.Backend.TokenizeString:input_type -> backend.PredictOptions - 9, // 23: backend.Backend.Status:input_type -> backend.HealthMessage - 3, // 24: backend.Backend.StoresSet:input_type -> backend.StoresSetOptions - 4, // 25: backend.Backend.StoresDelete:input_type -> backend.StoresDeleteOptions - 5, // 26: backend.Backend.StoresGet:input_type -> backend.StoresGetOptions - 7, // 27: backend.Backend.StoresFind:input_type -> backend.StoresFindOptions - 11, // 28: backend.Backend.Health:output_type -> backend.Reply - 11, // 29: backend.Backend.Predict:output_type -> backend.Reply - 13, // 30: backend.Backend.LoadModel:output_type -> backend.Result - 11, // 31: backend.Backend.PredictStream:output_type -> backend.Reply - 14, // 32: backend.Backend.Embedding:output_type -> backend.EmbeddingResult - 13, // 33: backend.Backend.GenerateImage:output_type -> backend.Result - 16, // 34: backend.Backend.AudioTranscription:output_type -> backend.TranscriptResult - 13, // 35: backend.Backend.TTS:output_type -> backend.Result - 20, // 36: backend.Backend.TokenizeString:output_type -> backend.TokenizationResponse - 22, // 37: backend.Backend.Status:output_type -> backend.StatusResponse - 13, // 38: backend.Backend.StoresSet:output_type -> backend.Result - 13, // 39: backend.Backend.StoresDelete:output_type -> backend.Result - 6, // 40: backend.Backend.StoresGet:output_type -> backend.StoresGetResult - 8, // 41: backend.Backend.StoresFind:output_type -> backend.StoresFindResult - 28, // [28:42] is the sub-list for method output_type - 14, // [14:28] is the sub-list for method input_type - 14, // [14:14] is the sub-list for extension type_name - 14, // [14:14] is the sub-list for extension extendee - 0, // [0:14] is the sub-list for field type_name -} - -func init() { file_backend_proto_init() } -func file_backend_proto_init() { - if File_backend_proto != nil { - return - } - if !protoimpl.UnsafeEnabled { - file_backend_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*StoresKey); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*StoresValue); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*StoresSetOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*StoresDeleteOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[4].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*StoresGetOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[5].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*StoresGetResult); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[6].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*StoresFindOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[7].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*StoresFindResult); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[8].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*HealthMessage); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[9].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*PredictOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[10].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*Reply); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[11].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*ModelOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[12].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*Result); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[13].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*EmbeddingResult); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[14].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*TranscriptRequest); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[15].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*TranscriptResult); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[16].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*TranscriptSegment); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[17].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GenerateImageRequest); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[18].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*TTSRequest); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[19].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*TokenizationResponse); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[20].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*MemoryUsageData); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[21].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*StatusResponse); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_backend_proto_msgTypes[22].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*Message); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - } - type x struct{} - out := protoimpl.TypeBuilder{ - File: protoimpl.DescBuilder{ - GoPackagePath: reflect.TypeOf(x{}).PkgPath(), - RawDescriptor: file_backend_proto_rawDesc, - NumEnums: 1, - NumMessages: 24, - NumExtensions: 0, - NumServices: 1, - }, - GoTypes: file_backend_proto_goTypes, - DependencyIndexes: file_backend_proto_depIdxs, - EnumInfos: file_backend_proto_enumTypes, - MessageInfos: file_backend_proto_msgTypes, - }.Build() - File_backend_proto = out.File - file_backend_proto_rawDesc = nil - file_backend_proto_goTypes = nil - file_backend_proto_depIdxs = nil -} diff --git a/pkg/grpc/proto/backend_grpc.pb.go b/pkg/grpc/proto/backend_grpc.pb.go deleted file mode 100644 index a1f442e0..00000000 --- a/pkg/grpc/proto/backend_grpc.pb.go +++ /dev/null @@ -1,618 +0,0 @@ -// Code generated by protoc-gen-go-grpc. DO NOT EDIT. -// versions: -// - protoc-gen-go-grpc v1.3.0 -// - protoc v5.26.1 -// source: backend.proto - -package proto - -import ( - context "context" - grpc "google.golang.org/grpc" - codes "google.golang.org/grpc/codes" - status "google.golang.org/grpc/status" -) - -// This is a compile-time assertion to ensure that this generated file -// is compatible with the grpc package it is being compiled against. -// Requires gRPC-Go v1.32.0 or later. -const _ = grpc.SupportPackageIsVersion7 - -const ( - Backend_Health_FullMethodName = "/backend.Backend/Health" - Backend_Predict_FullMethodName = "/backend.Backend/Predict" - Backend_LoadModel_FullMethodName = "/backend.Backend/LoadModel" - Backend_PredictStream_FullMethodName = "/backend.Backend/PredictStream" - Backend_Embedding_FullMethodName = "/backend.Backend/Embedding" - Backend_GenerateImage_FullMethodName = "/backend.Backend/GenerateImage" - Backend_AudioTranscription_FullMethodName = "/backend.Backend/AudioTranscription" - Backend_TTS_FullMethodName = "/backend.Backend/TTS" - Backend_TokenizeString_FullMethodName = "/backend.Backend/TokenizeString" - Backend_Status_FullMethodName = "/backend.Backend/Status" - Backend_StoresSet_FullMethodName = "/backend.Backend/StoresSet" - Backend_StoresDelete_FullMethodName = "/backend.Backend/StoresDelete" - Backend_StoresGet_FullMethodName = "/backend.Backend/StoresGet" - Backend_StoresFind_FullMethodName = "/backend.Backend/StoresFind" -) - -// BackendClient is the client API for Backend service. -// -// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. -type BackendClient interface { - Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error) - Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error) - LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error) - PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (Backend_PredictStreamClient, error) - Embedding(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*EmbeddingResult, error) - GenerateImage(ctx context.Context, in *GenerateImageRequest, opts ...grpc.CallOption) (*Result, error) - AudioTranscription(ctx context.Context, in *TranscriptRequest, opts ...grpc.CallOption) (*TranscriptResult, error) - TTS(ctx context.Context, in *TTSRequest, opts ...grpc.CallOption) (*Result, error) - TokenizeString(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*TokenizationResponse, error) - Status(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*StatusResponse, error) - StoresSet(ctx context.Context, in *StoresSetOptions, opts ...grpc.CallOption) (*Result, error) - StoresDelete(ctx context.Context, in *StoresDeleteOptions, opts ...grpc.CallOption) (*Result, error) - StoresGet(ctx context.Context, in *StoresGetOptions, opts ...grpc.CallOption) (*StoresGetResult, error) - StoresFind(ctx context.Context, in *StoresFindOptions, opts ...grpc.CallOption) (*StoresFindResult, error) -} - -type backendClient struct { - cc grpc.ClientConnInterface -} - -func NewBackendClient(cc grpc.ClientConnInterface) BackendClient { - return &backendClient{cc} -} - -func (c *backendClient) Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error) { - out := new(Reply) - err := c.cc.Invoke(ctx, Backend_Health_FullMethodName, in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error) { - out := new(Reply) - err := c.cc.Invoke(ctx, Backend_Predict_FullMethodName, in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error) { - out := new(Result) - err := c.cc.Invoke(ctx, Backend_LoadModel_FullMethodName, in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (Backend_PredictStreamClient, error) { - stream, err := c.cc.NewStream(ctx, &Backend_ServiceDesc.Streams[0], Backend_PredictStream_FullMethodName, opts...) - if err != nil { - return nil, err - } - x := &backendPredictStreamClient{stream} - if err := x.ClientStream.SendMsg(in); err != nil { - return nil, err - } - if err := x.ClientStream.CloseSend(); err != nil { - return nil, err - } - return x, nil -} - -type Backend_PredictStreamClient interface { - Recv() (*Reply, error) - grpc.ClientStream -} - -type backendPredictStreamClient struct { - grpc.ClientStream -} - -func (x *backendPredictStreamClient) Recv() (*Reply, error) { - m := new(Reply) - if err := x.ClientStream.RecvMsg(m); err != nil { - return nil, err - } - return m, nil -} - -func (c *backendClient) Embedding(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*EmbeddingResult, error) { - out := new(EmbeddingResult) - err := c.cc.Invoke(ctx, Backend_Embedding_FullMethodName, in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) GenerateImage(ctx context.Context, in *GenerateImageRequest, opts ...grpc.CallOption) (*Result, error) { - out := new(Result) - err := c.cc.Invoke(ctx, Backend_GenerateImage_FullMethodName, in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) AudioTranscription(ctx context.Context, in *TranscriptRequest, opts ...grpc.CallOption) (*TranscriptResult, error) { - out := new(TranscriptResult) - err := c.cc.Invoke(ctx, Backend_AudioTranscription_FullMethodName, in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) TTS(ctx context.Context, in *TTSRequest, opts ...grpc.CallOption) (*Result, error) { - out := new(Result) - err := c.cc.Invoke(ctx, Backend_TTS_FullMethodName, in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) TokenizeString(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*TokenizationResponse, error) { - out := new(TokenizationResponse) - err := c.cc.Invoke(ctx, Backend_TokenizeString_FullMethodName, in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) Status(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*StatusResponse, error) { - out := new(StatusResponse) - err := c.cc.Invoke(ctx, Backend_Status_FullMethodName, in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) StoresSet(ctx context.Context, in *StoresSetOptions, opts ...grpc.CallOption) (*Result, error) { - out := new(Result) - err := c.cc.Invoke(ctx, Backend_StoresSet_FullMethodName, in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) StoresDelete(ctx context.Context, in *StoresDeleteOptions, opts ...grpc.CallOption) (*Result, error) { - out := new(Result) - err := c.cc.Invoke(ctx, Backend_StoresDelete_FullMethodName, in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) StoresGet(ctx context.Context, in *StoresGetOptions, opts ...grpc.CallOption) (*StoresGetResult, error) { - out := new(StoresGetResult) - err := c.cc.Invoke(ctx, Backend_StoresGet_FullMethodName, in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -func (c *backendClient) StoresFind(ctx context.Context, in *StoresFindOptions, opts ...grpc.CallOption) (*StoresFindResult, error) { - out := new(StoresFindResult) - err := c.cc.Invoke(ctx, Backend_StoresFind_FullMethodName, in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -// BackendServer is the server API for Backend service. -// All implementations must embed UnimplementedBackendServer -// for forward compatibility -type BackendServer interface { - Health(context.Context, *HealthMessage) (*Reply, error) - Predict(context.Context, *PredictOptions) (*Reply, error) - LoadModel(context.Context, *ModelOptions) (*Result, error) - PredictStream(*PredictOptions, Backend_PredictStreamServer) error - Embedding(context.Context, *PredictOptions) (*EmbeddingResult, error) - GenerateImage(context.Context, *GenerateImageRequest) (*Result, error) - AudioTranscription(context.Context, *TranscriptRequest) (*TranscriptResult, error) - TTS(context.Context, *TTSRequest) (*Result, error) - TokenizeString(context.Context, *PredictOptions) (*TokenizationResponse, error) - Status(context.Context, *HealthMessage) (*StatusResponse, error) - StoresSet(context.Context, *StoresSetOptions) (*Result, error) - StoresDelete(context.Context, *StoresDeleteOptions) (*Result, error) - StoresGet(context.Context, *StoresGetOptions) (*StoresGetResult, error) - StoresFind(context.Context, *StoresFindOptions) (*StoresFindResult, error) - mustEmbedUnimplementedBackendServer() -} - -// UnimplementedBackendServer must be embedded to have forward compatible implementations. -type UnimplementedBackendServer struct { -} - -func (UnimplementedBackendServer) Health(context.Context, *HealthMessage) (*Reply, error) { - return nil, status.Errorf(codes.Unimplemented, "method Health not implemented") -} -func (UnimplementedBackendServer) Predict(context.Context, *PredictOptions) (*Reply, error) { - return nil, status.Errorf(codes.Unimplemented, "method Predict not implemented") -} -func (UnimplementedBackendServer) LoadModel(context.Context, *ModelOptions) (*Result, error) { - return nil, status.Errorf(codes.Unimplemented, "method LoadModel not implemented") -} -func (UnimplementedBackendServer) PredictStream(*PredictOptions, Backend_PredictStreamServer) error { - return status.Errorf(codes.Unimplemented, "method PredictStream not implemented") -} -func (UnimplementedBackendServer) Embedding(context.Context, *PredictOptions) (*EmbeddingResult, error) { - return nil, status.Errorf(codes.Unimplemented, "method Embedding not implemented") -} -func (UnimplementedBackendServer) GenerateImage(context.Context, *GenerateImageRequest) (*Result, error) { - return nil, status.Errorf(codes.Unimplemented, "method GenerateImage not implemented") -} -func (UnimplementedBackendServer) AudioTranscription(context.Context, *TranscriptRequest) (*TranscriptResult, error) { - return nil, status.Errorf(codes.Unimplemented, "method AudioTranscription not implemented") -} -func (UnimplementedBackendServer) TTS(context.Context, *TTSRequest) (*Result, error) { - return nil, status.Errorf(codes.Unimplemented, "method TTS not implemented") -} -func (UnimplementedBackendServer) TokenizeString(context.Context, *PredictOptions) (*TokenizationResponse, error) { - return nil, status.Errorf(codes.Unimplemented, "method TokenizeString not implemented") -} -func (UnimplementedBackendServer) Status(context.Context, *HealthMessage) (*StatusResponse, error) { - return nil, status.Errorf(codes.Unimplemented, "method Status not implemented") -} -func (UnimplementedBackendServer) StoresSet(context.Context, *StoresSetOptions) (*Result, error) { - return nil, status.Errorf(codes.Unimplemented, "method StoresSet not implemented") -} -func (UnimplementedBackendServer) StoresDelete(context.Context, *StoresDeleteOptions) (*Result, error) { - return nil, status.Errorf(codes.Unimplemented, "method StoresDelete not implemented") -} -func (UnimplementedBackendServer) StoresGet(context.Context, *StoresGetOptions) (*StoresGetResult, error) { - return nil, status.Errorf(codes.Unimplemented, "method StoresGet not implemented") -} -func (UnimplementedBackendServer) StoresFind(context.Context, *StoresFindOptions) (*StoresFindResult, error) { - return nil, status.Errorf(codes.Unimplemented, "method StoresFind not implemented") -} -func (UnimplementedBackendServer) mustEmbedUnimplementedBackendServer() {} - -// UnsafeBackendServer may be embedded to opt out of forward compatibility for this service. -// Use of this interface is not recommended, as added methods to BackendServer will -// result in compilation errors. -type UnsafeBackendServer interface { - mustEmbedUnimplementedBackendServer() -} - -func RegisterBackendServer(s grpc.ServiceRegistrar, srv BackendServer) { - s.RegisterService(&Backend_ServiceDesc, srv) -} - -func _Backend_Health_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(HealthMessage) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).Health(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: Backend_Health_FullMethodName, - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).Health(ctx, req.(*HealthMessage)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_Predict_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(PredictOptions) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).Predict(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: Backend_Predict_FullMethodName, - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).Predict(ctx, req.(*PredictOptions)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_LoadModel_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(ModelOptions) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).LoadModel(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: Backend_LoadModel_FullMethodName, - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).LoadModel(ctx, req.(*ModelOptions)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_PredictStream_Handler(srv interface{}, stream grpc.ServerStream) error { - m := new(PredictOptions) - if err := stream.RecvMsg(m); err != nil { - return err - } - return srv.(BackendServer).PredictStream(m, &backendPredictStreamServer{stream}) -} - -type Backend_PredictStreamServer interface { - Send(*Reply) error - grpc.ServerStream -} - -type backendPredictStreamServer struct { - grpc.ServerStream -} - -func (x *backendPredictStreamServer) Send(m *Reply) error { - return x.ServerStream.SendMsg(m) -} - -func _Backend_Embedding_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(PredictOptions) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).Embedding(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: Backend_Embedding_FullMethodName, - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).Embedding(ctx, req.(*PredictOptions)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_GenerateImage_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(GenerateImageRequest) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).GenerateImage(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: Backend_GenerateImage_FullMethodName, - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).GenerateImage(ctx, req.(*GenerateImageRequest)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_AudioTranscription_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(TranscriptRequest) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).AudioTranscription(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: Backend_AudioTranscription_FullMethodName, - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).AudioTranscription(ctx, req.(*TranscriptRequest)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_TTS_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(TTSRequest) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).TTS(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: Backend_TTS_FullMethodName, - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).TTS(ctx, req.(*TTSRequest)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_TokenizeString_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(PredictOptions) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).TokenizeString(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: Backend_TokenizeString_FullMethodName, - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).TokenizeString(ctx, req.(*PredictOptions)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_Status_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(HealthMessage) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).Status(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: Backend_Status_FullMethodName, - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).Status(ctx, req.(*HealthMessage)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_StoresSet_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(StoresSetOptions) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).StoresSet(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: Backend_StoresSet_FullMethodName, - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).StoresSet(ctx, req.(*StoresSetOptions)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_StoresDelete_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(StoresDeleteOptions) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).StoresDelete(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: Backend_StoresDelete_FullMethodName, - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).StoresDelete(ctx, req.(*StoresDeleteOptions)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_StoresGet_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(StoresGetOptions) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).StoresGet(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: Backend_StoresGet_FullMethodName, - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).StoresGet(ctx, req.(*StoresGetOptions)) - } - return interceptor(ctx, in, info, handler) -} - -func _Backend_StoresFind_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(StoresFindOptions) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(BackendServer).StoresFind(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: Backend_StoresFind_FullMethodName, - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(BackendServer).StoresFind(ctx, req.(*StoresFindOptions)) - } - return interceptor(ctx, in, info, handler) -} - -// Backend_ServiceDesc is the grpc.ServiceDesc for Backend service. -// It's only intended for direct use with grpc.RegisterService, -// and not to be introspected or modified (even as a copy) -var Backend_ServiceDesc = grpc.ServiceDesc{ - ServiceName: "backend.Backend", - HandlerType: (*BackendServer)(nil), - Methods: []grpc.MethodDesc{ - { - MethodName: "Health", - Handler: _Backend_Health_Handler, - }, - { - MethodName: "Predict", - Handler: _Backend_Predict_Handler, - }, - { - MethodName: "LoadModel", - Handler: _Backend_LoadModel_Handler, - }, - { - MethodName: "Embedding", - Handler: _Backend_Embedding_Handler, - }, - { - MethodName: "GenerateImage", - Handler: _Backend_GenerateImage_Handler, - }, - { - MethodName: "AudioTranscription", - Handler: _Backend_AudioTranscription_Handler, - }, - { - MethodName: "TTS", - Handler: _Backend_TTS_Handler, - }, - { - MethodName: "TokenizeString", - Handler: _Backend_TokenizeString_Handler, - }, - { - MethodName: "Status", - Handler: _Backend_Status_Handler, - }, - { - MethodName: "StoresSet", - Handler: _Backend_StoresSet_Handler, - }, - { - MethodName: "StoresDelete", - Handler: _Backend_StoresDelete_Handler, - }, - { - MethodName: "StoresGet", - Handler: _Backend_StoresGet_Handler, - }, - { - MethodName: "StoresFind", - Handler: _Backend_StoresFind_Handler, - }, - }, - Streams: []grpc.StreamDesc{ - { - StreamName: "PredictStream", - Handler: _Backend_PredictStream_Handler, - ServerStreams: true, - }, - }, - Metadata: "backend.proto", -} From eed5706994a3e770a0194cad9d1cfd724ba1b10a Mon Sep 17 00:00:00 2001 From: Dave Date: Sat, 13 Apr 2024 03:45:34 -0400 Subject: [PATCH 0044/2648] refactor: backend/service split, channel-based llm flow (#1963) Refactor: channel based llm flow and services split --------- Signed-off-by: Dave Lee --- .github/workflows/test.yml | 15 +- Makefile | 18 +- backend/go/transcribe/transcript.go | 6 +- backend/go/transcribe/whisper.go | 2 +- core/backend/embeddings.go | 90 +- core/backend/image.go | 261 +++++- core/backend/llm.go | 271 ++++-- core/backend/options.go | 84 +- core/backend/transcript.go | 41 +- core/backend/tts.go | 77 +- core/cli/run.go | 8 +- core/cli/transcript.go | 19 +- core/cli/tts.go | 26 +- core/config/backend_config.go | 301 +------ core/config/backend_config_loader.go | 509 +++++++++++ core/config/exports_test.go | 6 + core/http/api.go | 209 +++-- core/http/api_test.go | 98 ++- core/http/ctx/fiber.go | 65 +- core/http/endpoints/elevenlabs/tts.go | 39 +- .../http/endpoints/localai/backend_monitor.go | 4 +- core/http/endpoints/localai/tts.go | 39 +- core/http/endpoints/openai/assistant.go | 2 +- core/http/endpoints/openai/chat.go | 621 ++------------ core/http/endpoints/openai/completion.go | 163 +--- core/http/endpoints/openai/edit.go | 78 +- core/http/endpoints/openai/embeddings.go | 65 +- core/http/endpoints/openai/image.go | 218 +---- core/http/endpoints/openai/inference.go | 55 -- core/http/endpoints/openai/list.go | 52 +- core/http/endpoints/openai/request.go | 285 ------- core/http/endpoints/openai/transcription.go | 28 +- core/schema/{whisper.go => transcription.go} | 2 +- core/services/backend_monitor.go | 30 +- core/services/gallery.go | 116 ++- core/services/list_models.go | 72 ++ .../services}/model_preload_test.go | 5 +- core/services/openai.go | 805 ++++++++++++++++++ core/startup/startup.go | 91 +- core/state.go | 41 + .../llm text/-completions Stream.bru | 25 + pkg/concurrency/concurrency.go | 135 +++ pkg/concurrency/concurrency_test.go | 101 +++ pkg/concurrency/types.go | 6 + pkg/grpc/backend.go | 2 +- pkg/grpc/base/base.go | 4 +- pkg/grpc/client.go | 4 +- pkg/grpc/embed.go | 4 +- pkg/grpc/interface.go | 2 +- pkg/model/initializers.go | 8 +- pkg/startup/model_preload.go | 85 -- pkg/utils/base64.go | 50 ++ 52 files changed, 3064 insertions(+), 2279 deletions(-) create mode 100644 core/config/backend_config_loader.go create mode 100644 core/config/exports_test.go delete mode 100644 core/http/endpoints/openai/inference.go delete mode 100644 core/http/endpoints/openai/request.go rename core/schema/{whisper.go => transcription.go} (90%) create mode 100644 core/services/list_models.go rename {pkg/startup => core/services}/model_preload_test.go (96%) create mode 100644 core/services/openai.go create mode 100644 core/state.go create mode 100644 examples/bruno/LocalAI Test Requests/llm text/-completions Stream.bru create mode 100644 pkg/concurrency/concurrency.go create mode 100644 pkg/concurrency/concurrency_test.go create mode 100644 pkg/concurrency/types.go delete mode 100644 pkg/startup/model_preload.go create mode 100644 pkg/utils/base64.go diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 46c4e065..29bd3e08 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -121,8 +121,9 @@ jobs: PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test - name: Setup tmate session if tests fail if: ${{ failure() }} - uses: mxschmitt/action-tmate@v3 - timeout-minutes: 5 + uses: dave-gray101/action-tmate@master + with: + connect-timeout-seconds: 180 tests-aio-container: runs-on: ubuntu-latest @@ -173,8 +174,9 @@ jobs: make run-e2e-aio - name: Setup tmate session if tests fail if: ${{ failure() }} - uses: mxschmitt/action-tmate@v3 - timeout-minutes: 5 + uses: dave-gray101/action-tmate@master + with: + connect-timeout-seconds: 180 tests-apple: runs-on: macOS-14 @@ -207,5 +209,6 @@ jobs: BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make --jobs 4 --output-sync=target test - name: Setup tmate session if tests fail if: ${{ failure() }} - uses: mxschmitt/action-tmate@v3 - timeout-minutes: 5 \ No newline at end of file + uses: dave-gray101/action-tmate@master + with: + connect-timeout-seconds: 180 \ No newline at end of file diff --git a/Makefile b/Makefile index 5932dfb2..9f86ef23 100644 --- a/Makefile +++ b/Makefile @@ -301,6 +301,9 @@ clean-tests: rm -rf test-dir rm -rf core/http/backend-assets +halt-backends: ## Used to clean up stray backends sometimes left running when debugging manually + ps | grep 'backend-assets/grpc/' | awk '{print $$1}' | xargs -I {} kill -9 {} + ## Build: build: prepare backend-assets grpcs ## Build the project $(info ${GREEN}I local-ai build info:${RESET}) @@ -365,13 +368,13 @@ run-e2e-image: run-e2e-aio: @echo 'Running e2e AIO tests' - $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e-aio + $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e-aio test-e2e: @echo 'Running e2e tests' BUILD_TYPE=$(BUILD_TYPE) \ LOCALAI_API=http://$(E2E_BRIDGE_IP):5390/v1 \ - $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e + $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e teardown-e2e: rm -rf $(TEST_DIR) || true @@ -379,15 +382,15 @@ teardown-e2e: test-gpt4all: prepare-test TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ - $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts 5 -v -r $(TEST_PATHS) + $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS) test-llama: prepare-test TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ - $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r $(TEST_PATHS) + $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS) test-llama-gguf: prepare-test TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ - $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts 5 -v -r $(TEST_PATHS) + $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS) test-tts: prepare-test TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ @@ -636,7 +639,10 @@ backend-assets/grpc/llama-ggml: sources/go-llama-ggml sources/go-llama-ggml/libb $(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama-ggml CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama-ggml LIBRARY_PATH=$(CURDIR)/sources/go-llama-ggml \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/ - +# EXPERIMENTAL: +ifeq ($(BUILD_TYPE),metal) + cp $(CURDIR)/sources/go-llama-ggml/llama.cpp/ggml-metal.metal backend-assets/grpc/ +endif backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/ diff --git a/backend/go/transcribe/transcript.go b/backend/go/transcribe/transcript.go index fdfaa974..b38d5b9f 100644 --- a/backend/go/transcribe/transcript.go +++ b/backend/go/transcribe/transcript.go @@ -21,7 +21,7 @@ func runCommand(command []string) (string, error) { // AudioToWav converts audio to wav for transcribe. // TODO: use https://github.com/mccoyst/ogg? func audioToWav(src, dst string) error { - command := []string{"ffmpeg", "-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst} + command := []string{"ffmpeg", "-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst} out, err := runCommand(command) if err != nil { return fmt.Errorf("error: %w out: %s", err, out) @@ -29,8 +29,8 @@ func audioToWav(src, dst string) error { return nil } -func Transcript(model whisper.Model, audiopath, language string, threads uint) (schema.Result, error) { - res := schema.Result{} +func Transcript(model whisper.Model, audiopath, language string, threads uint) (schema.TranscriptionResult, error) { + res := schema.TranscriptionResult{} dir, err := os.MkdirTemp("", "whisper") if err != nil { diff --git a/backend/go/transcribe/whisper.go b/backend/go/transcribe/whisper.go index ac93be01..a9a62d24 100644 --- a/backend/go/transcribe/whisper.go +++ b/backend/go/transcribe/whisper.go @@ -21,6 +21,6 @@ func (sd *Whisper) Load(opts *pb.ModelOptions) error { return err } -func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.Result, error) { +func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.TranscriptionResult, error) { return Transcript(sd.whisper, opts.Dst, opts.Language, uint(opts.Threads)) } diff --git a/core/backend/embeddings.go b/core/backend/embeddings.go index 03ff90b9..2c63dedc 100644 --- a/core/backend/embeddings.go +++ b/core/backend/embeddings.go @@ -2,14 +2,100 @@ package backend import ( "fmt" + "time" "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/schema" + "github.com/google/uuid" + "github.com/go-skynet/LocalAI/pkg/concurrency" "github.com/go-skynet/LocalAI/pkg/grpc" - model "github.com/go-skynet/LocalAI/pkg/model" + "github.com/go-skynet/LocalAI/pkg/model" ) -func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) { +type EmbeddingsBackendService struct { + ml *model.ModelLoader + bcl *config.BackendConfigLoader + appConfig *config.ApplicationConfig +} + +func NewEmbeddingsBackendService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *EmbeddingsBackendService { + return &EmbeddingsBackendService{ + ml: ml, + bcl: bcl, + appConfig: appConfig, + } +} + +func (ebs *EmbeddingsBackendService) Embeddings(request *schema.OpenAIRequest) <-chan concurrency.ErrorOr[*schema.OpenAIResponse] { + + resultChannel := make(chan concurrency.ErrorOr[*schema.OpenAIResponse]) + go func(request *schema.OpenAIRequest) { + if request.Model == "" { + request.Model = model.StableDiffusionBackend + } + + bc, request, err := ebs.bcl.LoadBackendConfigForModelAndOpenAIRequest(request.Model, request, ebs.appConfig) + if err != nil { + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} + close(resultChannel) + return + } + + items := []schema.Item{} + + for i, s := range bc.InputToken { + // get the model function to call for the result + embedFn, err := modelEmbedding("", s, ebs.ml, bc, ebs.appConfig) + if err != nil { + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} + close(resultChannel) + return + } + + embeddings, err := embedFn() + if err != nil { + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} + close(resultChannel) + return + } + items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"}) + } + + for i, s := range bc.InputStrings { + // get the model function to call for the result + embedFn, err := modelEmbedding(s, []int{}, ebs.ml, bc, ebs.appConfig) + if err != nil { + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} + close(resultChannel) + return + } + + embeddings, err := embedFn() + if err != nil { + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} + close(resultChannel) + return + } + items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"}) + } + + id := uuid.New().String() + created := int(time.Now().Unix()) + resp := &schema.OpenAIResponse{ + ID: id, + Created: created, + Model: request.Model, // we have to return what the user sent here, due to OpenAI spec. + Data: items, + Object: "list", + } + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: resp} + close(resultChannel) + }(request) + return resultChannel +} + +func modelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig *config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) { modelFile := backendConfig.Model grpcOpts := gRPCModelOpts(backendConfig) diff --git a/core/backend/image.go b/core/backend/image.go index b0cffb0b..affb3bb3 100644 --- a/core/backend/image.go +++ b/core/backend/image.go @@ -1,18 +1,252 @@ package backend import ( - "github.com/go-skynet/LocalAI/core/config" + "bufio" + "encoding/base64" + "fmt" + "io" + "net/http" + "os" + "path/filepath" + "strconv" + "strings" + "time" + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/schema" + "github.com/google/uuid" + "github.com/rs/zerolog/log" + + "github.com/go-skynet/LocalAI/pkg/concurrency" "github.com/go-skynet/LocalAI/pkg/grpc/proto" - model "github.com/go-skynet/LocalAI/pkg/model" + "github.com/go-skynet/LocalAI/pkg/model" ) -func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) { +type ImageGenerationBackendService struct { + ml *model.ModelLoader + bcl *config.BackendConfigLoader + appConfig *config.ApplicationConfig + BaseUrlForGeneratedImages string +} + +func NewImageGenerationBackendService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *ImageGenerationBackendService { + return &ImageGenerationBackendService{ + ml: ml, + bcl: bcl, + appConfig: appConfig, + } +} + +func (igbs *ImageGenerationBackendService) GenerateImage(request *schema.OpenAIRequest) <-chan concurrency.ErrorOr[*schema.OpenAIResponse] { + resultChannel := make(chan concurrency.ErrorOr[*schema.OpenAIResponse]) + go func(request *schema.OpenAIRequest) { + bc, request, err := igbs.bcl.LoadBackendConfigForModelAndOpenAIRequest(request.Model, request, igbs.appConfig) + if err != nil { + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} + close(resultChannel) + return + } + + src := "" + if request.File != "" { + + var fileData []byte + // check if input.File is an URL, if so download it and save it + // to a temporary file + if strings.HasPrefix(request.File, "http://") || strings.HasPrefix(request.File, "https://") { + out, err := downloadFile(request.File) + if err != nil { + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: fmt.Errorf("failed downloading file:%w", err)} + close(resultChannel) + return + } + defer os.RemoveAll(out) + + fileData, err = os.ReadFile(out) + if err != nil { + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: fmt.Errorf("failed reading file:%w", err)} + close(resultChannel) + return + } + + } else { + // base 64 decode the file and write it somewhere + // that we will cleanup + fileData, err = base64.StdEncoding.DecodeString(request.File) + if err != nil { + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} + close(resultChannel) + return + } + } + + // Create a temporary file + outputFile, err := os.CreateTemp(igbs.appConfig.ImageDir, "b64") + if err != nil { + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} + close(resultChannel) + return + } + // write the base64 result + writer := bufio.NewWriter(outputFile) + _, err = writer.Write(fileData) + if err != nil { + outputFile.Close() + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} + close(resultChannel) + return + } + outputFile.Close() + src = outputFile.Name() + defer os.RemoveAll(src) + } + + log.Debug().Msgf("Parameter Config: %+v", bc) + + switch bc.Backend { + case "stablediffusion": + bc.Backend = model.StableDiffusionBackend + case "tinydream": + bc.Backend = model.TinyDreamBackend + case "": + bc.Backend = model.StableDiffusionBackend + if bc.Model == "" { + bc.Model = "stablediffusion_assets" // TODO: check? + } + } + + sizeParts := strings.Split(request.Size, "x") + if len(sizeParts) != 2 { + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: fmt.Errorf("invalid value for 'size'")} + close(resultChannel) + return + } + width, err := strconv.Atoi(sizeParts[0]) + if err != nil { + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: fmt.Errorf("invalid value for 'size'")} + close(resultChannel) + return + } + height, err := strconv.Atoi(sizeParts[1]) + if err != nil { + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: fmt.Errorf("invalid value for 'size'")} + close(resultChannel) + return + } + + b64JSON := false + if request.ResponseFormat.Type == "b64_json" { + b64JSON = true + } + // src and clip_skip + var result []schema.Item + for _, i := range bc.PromptStrings { + n := request.N + if request.N == 0 { + n = 1 + } + for j := 0; j < n; j++ { + prompts := strings.Split(i, "|") + positive_prompt := prompts[0] + negative_prompt := "" + if len(prompts) > 1 { + negative_prompt = prompts[1] + } + + mode := 0 + step := bc.Step + if step == 0 { + step = 15 + } + + if request.Mode != 0 { + mode = request.Mode + } + + if request.Step != 0 { + step = request.Step + } + + tempDir := "" + if !b64JSON { + tempDir = igbs.appConfig.ImageDir + } + // Create a temporary file + outputFile, err := os.CreateTemp(tempDir, "b64") + if err != nil { + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} + close(resultChannel) + return + } + outputFile.Close() + output := outputFile.Name() + ".png" + // Rename the temporary file + err = os.Rename(outputFile.Name(), output) + if err != nil { + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} + close(resultChannel) + return + } + + if request.Seed == nil { + zVal := 0 // Idiomatic way to do this? Actually needed? + request.Seed = &zVal + } + + fn, err := imageGeneration(height, width, mode, step, *request.Seed, positive_prompt, negative_prompt, src, output, igbs.ml, bc, igbs.appConfig) + if err != nil { + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} + close(resultChannel) + return + } + if err := fn(); err != nil { + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} + close(resultChannel) + return + } + + item := &schema.Item{} + + if b64JSON { + defer os.RemoveAll(output) + data, err := os.ReadFile(output) + if err != nil { + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} + close(resultChannel) + return + } + item.B64JSON = base64.StdEncoding.EncodeToString(data) + } else { + base := filepath.Base(output) + item.URL = igbs.BaseUrlForGeneratedImages + base + } + + result = append(result, *item) + } + } + + id := uuid.New().String() + created := int(time.Now().Unix()) + resp := &schema.OpenAIResponse{ + ID: id, + Created: created, + Data: result, + } + resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: resp} + close(resultChannel) + }(request) + return resultChannel +} + +func imageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig *config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) { + threads := backendConfig.Threads if *threads == 0 && appConfig.Threads != 0 { threads = &appConfig.Threads } + gRPCOpts := gRPCModelOpts(backendConfig) + opts := modelOpts(backendConfig, appConfig, []model.Option{ model.WithBackendString(backendConfig.Backend), model.WithAssetDir(appConfig.AssetsDestination), @@ -50,3 +284,24 @@ func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negat return fn, nil } + +// TODO: Replace this function with pkg/downloader - no reason to have a (crappier) bespoke download file fn here, but get things working before that change. +func downloadFile(url string) (string, error) { + // Get the data + resp, err := http.Get(url) + if err != nil { + return "", err + } + defer resp.Body.Close() + + // Create the file + out, err := os.CreateTemp("", "image") + if err != nil { + return "", err + } + defer out.Close() + + // Write the body to file + _, err = io.Copy(out, resp.Body) + return out.Name(), err +} diff --git a/core/backend/llm.go b/core/backend/llm.go index 493dc25c..1878e87a 100644 --- a/core/backend/llm.go +++ b/core/backend/llm.go @@ -11,17 +11,22 @@ import ( "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/schema" + "github.com/rs/zerolog/log" + "github.com/go-skynet/LocalAI/pkg/concurrency" "github.com/go-skynet/LocalAI/pkg/gallery" "github.com/go-skynet/LocalAI/pkg/grpc" "github.com/go-skynet/LocalAI/pkg/grpc/proto" - model "github.com/go-skynet/LocalAI/pkg/model" + "github.com/go-skynet/LocalAI/pkg/model" "github.com/go-skynet/LocalAI/pkg/utils" ) -type LLMResponse struct { - Response string // should this be []byte? - Usage TokenUsage +type LLMRequest struct { + Id int // TODO Remove if not used. + Text string + Images []string + RawMessages []schema.Message + // TODO: Other Modalities? } type TokenUsage struct { @@ -29,57 +34,94 @@ type TokenUsage struct { Completion int } -func ModelInference(ctx context.Context, s string, messages []schema.Message, images []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) { - modelFile := c.Model - threads := c.Threads - if *threads == 0 && o.Threads != 0 { - threads = &o.Threads +type LLMResponse struct { + Request *LLMRequest + Response string // should this be []byte? + Usage TokenUsage +} + +// TODO: Does this belong here or in core/services/openai.go? +type LLMResponseBundle struct { + Request *schema.OpenAIRequest + Response []schema.Choice + Usage TokenUsage +} + +type LLMBackendService struct { + bcl *config.BackendConfigLoader + ml *model.ModelLoader + appConfig *config.ApplicationConfig + ftMutex sync.Mutex + cutstrings map[string]*regexp.Regexp +} + +func NewLLMBackendService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *LLMBackendService { + return &LLMBackendService{ + bcl: bcl, + ml: ml, + appConfig: appConfig, + ftMutex: sync.Mutex{}, + cutstrings: make(map[string]*regexp.Regexp), } - grpcOpts := gRPCModelOpts(c) +} + +// TODO: Should ctx param be removed and replaced with hardcoded req.Context? +func (llmbs *LLMBackendService) Inference(ctx context.Context, req *LLMRequest, bc *config.BackendConfig, enableTokenChannel bool) ( + resultChannel <-chan concurrency.ErrorOr[*LLMResponse], tokenChannel <-chan concurrency.ErrorOr[*LLMResponse], err error) { + + threads := bc.Threads + if (threads == nil || *threads == 0) && llmbs.appConfig.Threads != 0 { + threads = &llmbs.appConfig.Threads + } + + grpcOpts := gRPCModelOpts(bc) var inferenceModel grpc.Backend - var err error - opts := modelOpts(c, o, []model.Option{ + opts := modelOpts(bc, llmbs.appConfig, []model.Option{ model.WithLoadGRPCLoadModelOpts(grpcOpts), model.WithThreads(uint32(*threads)), // some models uses this to allocate threads during startup - model.WithAssetDir(o.AssetsDestination), - model.WithModel(modelFile), - model.WithContext(o.Context), + model.WithAssetDir(llmbs.appConfig.AssetsDestination), + model.WithModel(bc.Model), + model.WithContext(llmbs.appConfig.Context), }) - if c.Backend != "" { - opts = append(opts, model.WithBackendString(c.Backend)) + if bc.Backend != "" { + opts = append(opts, model.WithBackendString(bc.Backend)) } - // Check if the modelFile exists, if it doesn't try to load it from the gallery - if o.AutoloadGalleries { // experimental - if _, err := os.Stat(modelFile); os.IsNotExist(err) { + // Check if bc.Model exists, if it doesn't try to load it from the gallery + if llmbs.appConfig.AutoloadGalleries { // experimental + if _, err := os.Stat(bc.Model); os.IsNotExist(err) { utils.ResetDownloadTimers() // if we failed to load the model, we try to download it - err := gallery.InstallModelFromGalleryByName(o.Galleries, modelFile, loader.ModelPath, gallery.GalleryModel{}, utils.DisplayDownloadFunction) + err := gallery.InstallModelFromGalleryByName(llmbs.appConfig.Galleries, bc.Model, llmbs.appConfig.ModelPath, gallery.GalleryModel{}, utils.DisplayDownloadFunction) if err != nil { - return nil, err + return nil, nil, err } } } - if c.Backend == "" { - inferenceModel, err = loader.GreedyLoader(opts...) + if bc.Backend == "" { + log.Debug().Msgf("backend not known for %q, falling back to greedy loader to find it", bc.Model) + inferenceModel, err = llmbs.ml.GreedyLoader(opts...) } else { - inferenceModel, err = loader.BackendLoader(opts...) + inferenceModel, err = llmbs.ml.BackendLoader(opts...) } if err != nil { - return nil, err + log.Error().Err(err).Msg("[llmbs.Inference] failed to load a backend") + return } - var protoMessages []*proto.Message - // if we are using the tokenizer template, we need to convert the messages to proto messages - // unless the prompt has already been tokenized (non-chat endpoints + functions) - if c.TemplateConfig.UseTokenizerTemplate && s == "" { - protoMessages = make([]*proto.Message, len(messages), len(messages)) - for i, message := range messages { + grpcPredOpts := gRPCPredictOpts(bc, llmbs.appConfig.ModelPath) + grpcPredOpts.Prompt = req.Text + grpcPredOpts.Images = req.Images + + if bc.TemplateConfig.UseTokenizerTemplate && req.Text == "" { + grpcPredOpts.UseTokenizerTemplate = true + protoMessages := make([]*proto.Message, len(req.RawMessages), len(req.RawMessages)) + for i, message := range req.RawMessages { protoMessages[i] = &proto.Message{ Role: message.Role, } @@ -87,47 +129,32 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im case string: protoMessages[i].Content = ct default: - return nil, fmt.Errorf("Unsupported type for schema.Message.Content for inference: %T", ct) + err = fmt.Errorf("unsupported type for schema.Message.Content for inference: %T", ct) + return } } } - // in GRPC, the backend is supposed to answer to 1 single token if stream is not supported - fn := func() (LLMResponse, error) { - opts := gRPCPredictOpts(c, loader.ModelPath) - opts.Prompt = s - opts.Messages = protoMessages - opts.UseTokenizerTemplate = c.TemplateConfig.UseTokenizerTemplate - opts.Images = images + tokenUsage := TokenUsage{} - tokenUsage := TokenUsage{} + promptInfo, pErr := inferenceModel.TokenizeString(ctx, grpcPredOpts) + if pErr == nil && promptInfo.Length > 0 { + tokenUsage.Prompt = int(promptInfo.Length) + } - // check the per-model feature flag for usage, since tokenCallback may have a cost. - // Defaults to off as for now it is still experimental - if c.FeatureFlag.Enabled("usage") { - userTokenCallback := tokenCallback - if userTokenCallback == nil { - userTokenCallback = func(token string, usage TokenUsage) bool { - return true - } - } + rawResultChannel := make(chan concurrency.ErrorOr[*LLMResponse]) + // TODO this next line is the biggest argument for taking named return values _back_ out!!! + var rawTokenChannel chan concurrency.ErrorOr[*LLMResponse] - promptInfo, pErr := inferenceModel.TokenizeString(ctx, opts) - if pErr == nil && promptInfo.Length > 0 { - tokenUsage.Prompt = int(promptInfo.Length) - } + if enableTokenChannel { + rawTokenChannel = make(chan concurrency.ErrorOr[*LLMResponse]) - tokenCallback = func(token string, usage TokenUsage) bool { - tokenUsage.Completion++ - return userTokenCallback(token, tokenUsage) - } - } - - if tokenCallback != nil { - ss := "" + // TODO Needs better name + ss := "" + go func() { var partialRune []byte - err := inferenceModel.PredictStream(ctx, opts, func(chars []byte) { + err := inferenceModel.PredictStream(ctx, grpcPredOpts, func(chars []byte) { partialRune = append(partialRune, chars...) for len(partialRune) > 0 { @@ -137,48 +164,120 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im break } - tokenCallback(string(r), tokenUsage) + tokenUsage.Completion++ + rawTokenChannel <- concurrency.ErrorOr[*LLMResponse]{Value: &LLMResponse{ + Response: string(r), + Usage: tokenUsage, + }} + ss += string(r) partialRune = partialRune[size:] } }) - return LLMResponse{ - Response: ss, - Usage: tokenUsage, - }, err - } else { - // TODO: Is the chicken bit the only way to get here? is that acceptable? - reply, err := inferenceModel.Predict(ctx, opts) + close(rawTokenChannel) if err != nil { - return LLMResponse{}, err + rawResultChannel <- concurrency.ErrorOr[*LLMResponse]{Error: err} + } else { + rawResultChannel <- concurrency.ErrorOr[*LLMResponse]{Value: &LLMResponse{ + Response: ss, + Usage: tokenUsage, + }} } - return LLMResponse{ - Response: string(reply.Message), - Usage: tokenUsage, - }, err - } + close(rawResultChannel) + }() + } else { + go func() { + reply, err := inferenceModel.Predict(ctx, grpcPredOpts) + if err != nil { + rawResultChannel <- concurrency.ErrorOr[*LLMResponse]{Error: err} + close(rawResultChannel) + } else { + rawResultChannel <- concurrency.ErrorOr[*LLMResponse]{Value: &LLMResponse{ + Response: string(reply.Message), + Usage: tokenUsage, + }} + close(rawResultChannel) + } + }() } - return fn, nil + resultChannel = rawResultChannel + tokenChannel = rawTokenChannel + return } -var cutstrings map[string]*regexp.Regexp = make(map[string]*regexp.Regexp) -var mu sync.Mutex = sync.Mutex{} +// TODO: Should predInput be a seperate param still, or should this fn handle extracting it from request?? +func (llmbs *LLMBackendService) GenerateText(predInput string, request *schema.OpenAIRequest, bc *config.BackendConfig, + mappingFn func(*LLMResponse) schema.Choice, enableCompletionChannels bool, enableTokenChannels bool) ( + // Returns: + resultChannel <-chan concurrency.ErrorOr[*LLMResponseBundle], completionChannels []<-chan concurrency.ErrorOr[*LLMResponse], tokenChannels []<-chan concurrency.ErrorOr[*LLMResponse], err error) { -func Finetune(config config.BackendConfig, input, prediction string) string { + rawChannel := make(chan concurrency.ErrorOr[*LLMResponseBundle]) + resultChannel = rawChannel + + if request.N == 0 { // number of completions to return + request.N = 1 + } + images := []string{} + for _, m := range request.Messages { + images = append(images, m.StringImages...) + } + + for i := 0; i < request.N; i++ { + + individualResultChannel, tokenChannel, infErr := llmbs.Inference(request.Context, &LLMRequest{ + Text: predInput, + Images: images, + RawMessages: request.Messages, + }, bc, enableTokenChannels) + if infErr != nil { + err = infErr // Avoids complaints about redeclaring err but looks dumb + return + } + completionChannels = append(completionChannels, individualResultChannel) + tokenChannels = append(tokenChannels, tokenChannel) + } + + go func() { + initialBundle := LLMResponseBundle{ + Request: request, + Response: []schema.Choice{}, + Usage: TokenUsage{}, + } + + wg := concurrency.SliceOfChannelsReducer(completionChannels, rawChannel, func(iv concurrency.ErrorOr[*LLMResponse], ov concurrency.ErrorOr[*LLMResponseBundle]) concurrency.ErrorOr[*LLMResponseBundle] { + if iv.Error != nil { + ov.Error = iv.Error + // TODO: Decide if we should wipe partials or not? + return ov + } + ov.Value.Usage.Prompt += iv.Value.Usage.Prompt + ov.Value.Usage.Completion += iv.Value.Usage.Completion + + ov.Value.Response = append(ov.Value.Response, mappingFn(iv.Value)) + return ov + }, concurrency.ErrorOr[*LLMResponseBundle]{Value: &initialBundle}, true) + wg.Wait() + + }() + + return +} + +func (llmbs *LLMBackendService) Finetune(config config.BackendConfig, input, prediction string) string { if config.Echo { prediction = input + prediction } for _, c := range config.Cutstrings { - mu.Lock() - reg, ok := cutstrings[c] + llmbs.ftMutex.Lock() + reg, ok := llmbs.cutstrings[c] if !ok { - cutstrings[c] = regexp.MustCompile(c) - reg = cutstrings[c] + llmbs.cutstrings[c] = regexp.MustCompile(c) + reg = llmbs.cutstrings[c] } - mu.Unlock() + llmbs.ftMutex.Unlock() prediction = reg.ReplaceAllString(prediction, "") } diff --git a/core/backend/options.go b/core/backend/options.go index 5b303b05..0b4e56db 100644 --- a/core/backend/options.go +++ b/core/backend/options.go @@ -10,7 +10,7 @@ import ( model "github.com/go-skynet/LocalAI/pkg/model" ) -func modelOpts(c config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option { +func modelOpts(bc *config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option { if so.SingleBackend { opts = append(opts, model.WithSingleActiveBackend()) } @@ -19,12 +19,12 @@ func modelOpts(c config.BackendConfig, so *config.ApplicationConfig, opts []mode opts = append(opts, model.EnableParallelRequests) } - if c.GRPC.Attempts != 0 { - opts = append(opts, model.WithGRPCAttempts(c.GRPC.Attempts)) + if bc.GRPC.Attempts != 0 { + opts = append(opts, model.WithGRPCAttempts(bc.GRPC.Attempts)) } - if c.GRPC.AttemptsSleepTime != 0 { - opts = append(opts, model.WithGRPCAttemptsDelay(c.GRPC.AttemptsSleepTime)) + if bc.GRPC.AttemptsSleepTime != 0 { + opts = append(opts, model.WithGRPCAttemptsDelay(bc.GRPC.AttemptsSleepTime)) } for k, v := range so.ExternalGRPCBackends { @@ -34,7 +34,7 @@ func modelOpts(c config.BackendConfig, so *config.ApplicationConfig, opts []mode return opts } -func getSeed(c config.BackendConfig) int32 { +func getSeed(c *config.BackendConfig) int32 { seed := int32(*c.Seed) if seed == config.RAND_SEED { seed = rand.Int31() @@ -43,7 +43,7 @@ func getSeed(c config.BackendConfig) int32 { return seed } -func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions { +func gRPCModelOpts(c *config.BackendConfig) *pb.ModelOptions { b := 512 if c.Batch != 0 { b = c.Batch @@ -104,47 +104,47 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions { } } -func gRPCPredictOpts(c config.BackendConfig, modelPath string) *pb.PredictOptions { +func gRPCPredictOpts(bc *config.BackendConfig, modelPath string) *pb.PredictOptions { promptCachePath := "" - if c.PromptCachePath != "" { - p := filepath.Join(modelPath, c.PromptCachePath) + if bc.PromptCachePath != "" { + p := filepath.Join(modelPath, bc.PromptCachePath) os.MkdirAll(filepath.Dir(p), 0755) promptCachePath = p } return &pb.PredictOptions{ - Temperature: float32(*c.Temperature), - TopP: float32(*c.TopP), - NDraft: c.NDraft, - TopK: int32(*c.TopK), - Tokens: int32(*c.Maxtokens), - Threads: int32(*c.Threads), - PromptCacheAll: c.PromptCacheAll, - PromptCacheRO: c.PromptCacheRO, + Temperature: float32(*bc.Temperature), + TopP: float32(*bc.TopP), + NDraft: bc.NDraft, + TopK: int32(*bc.TopK), + Tokens: int32(*bc.Maxtokens), + Threads: int32(*bc.Threads), + PromptCacheAll: bc.PromptCacheAll, + PromptCacheRO: bc.PromptCacheRO, PromptCachePath: promptCachePath, - F16KV: *c.F16, - DebugMode: *c.Debug, - Grammar: c.Grammar, - NegativePromptScale: c.NegativePromptScale, - RopeFreqBase: c.RopeFreqBase, - RopeFreqScale: c.RopeFreqScale, - NegativePrompt: c.NegativePrompt, - Mirostat: int32(*c.LLMConfig.Mirostat), - MirostatETA: float32(*c.LLMConfig.MirostatETA), - MirostatTAU: float32(*c.LLMConfig.MirostatTAU), - Debug: *c.Debug, - StopPrompts: c.StopWords, - Repeat: int32(c.RepeatPenalty), - NKeep: int32(c.Keep), - Batch: int32(c.Batch), - IgnoreEOS: c.IgnoreEOS, - Seed: getSeed(c), - FrequencyPenalty: float32(c.FrequencyPenalty), - MLock: *c.MMlock, - MMap: *c.MMap, - MainGPU: c.MainGPU, - TensorSplit: c.TensorSplit, - TailFreeSamplingZ: float32(*c.TFZ), - TypicalP: float32(*c.TypicalP), + F16KV: *bc.F16, + DebugMode: *bc.Debug, + Grammar: bc.Grammar, + NegativePromptScale: bc.NegativePromptScale, + RopeFreqBase: bc.RopeFreqBase, + RopeFreqScale: bc.RopeFreqScale, + NegativePrompt: bc.NegativePrompt, + Mirostat: int32(*bc.LLMConfig.Mirostat), + MirostatETA: float32(*bc.LLMConfig.MirostatETA), + MirostatTAU: float32(*bc.LLMConfig.MirostatTAU), + Debug: *bc.Debug, + StopPrompts: bc.StopWords, + Repeat: int32(bc.RepeatPenalty), + NKeep: int32(bc.Keep), + Batch: int32(bc.Batch), + IgnoreEOS: bc.IgnoreEOS, + Seed: getSeed(bc), + FrequencyPenalty: float32(bc.FrequencyPenalty), + MLock: *bc.MMlock, + MMap: *bc.MMap, + MainGPU: bc.MainGPU, + TensorSplit: bc.TensorSplit, + TailFreeSamplingZ: float32(*bc.TFZ), + TypicalP: float32(*bc.TypicalP), } } diff --git a/core/backend/transcript.go b/core/backend/transcript.go index 4c3859df..6761c2ac 100644 --- a/core/backend/transcript.go +++ b/core/backend/transcript.go @@ -7,11 +7,48 @@ import ( "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/schema" + "github.com/go-skynet/LocalAI/pkg/concurrency" "github.com/go-skynet/LocalAI/pkg/grpc/proto" - model "github.com/go-skynet/LocalAI/pkg/model" + "github.com/go-skynet/LocalAI/pkg/model" ) -func ModelTranscription(audio, language string, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.Result, error) { +type TranscriptionBackendService struct { + ml *model.ModelLoader + bcl *config.BackendConfigLoader + appConfig *config.ApplicationConfig +} + +func NewTranscriptionBackendService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *TranscriptionBackendService { + return &TranscriptionBackendService{ + ml: ml, + bcl: bcl, + appConfig: appConfig, + } +} + +func (tbs *TranscriptionBackendService) Transcribe(request *schema.OpenAIRequest) <-chan concurrency.ErrorOr[*schema.TranscriptionResult] { + responseChannel := make(chan concurrency.ErrorOr[*schema.TranscriptionResult]) + go func(request *schema.OpenAIRequest) { + bc, request, err := tbs.bcl.LoadBackendConfigForModelAndOpenAIRequest(request.Model, request, tbs.appConfig) + if err != nil { + responseChannel <- concurrency.ErrorOr[*schema.TranscriptionResult]{Error: fmt.Errorf("failed reading parameters from request:%w", err)} + close(responseChannel) + return + } + + tr, err := modelTranscription(request.File, request.Language, tbs.ml, bc, tbs.appConfig) + if err != nil { + responseChannel <- concurrency.ErrorOr[*schema.TranscriptionResult]{Error: err} + close(responseChannel) + return + } + responseChannel <- concurrency.ErrorOr[*schema.TranscriptionResult]{Value: tr} + close(responseChannel) + }(request) + return responseChannel +} + +func modelTranscription(audio, language string, ml *model.ModelLoader, backendConfig *config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) { opts := modelOpts(backendConfig, appConfig, []model.Option{ model.WithBackendString(model.WhisperBackend), diff --git a/core/backend/tts.go b/core/backend/tts.go index f97b6202..d1fa270d 100644 --- a/core/backend/tts.go +++ b/core/backend/tts.go @@ -7,29 +7,60 @@ import ( "path/filepath" "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/schema" + "github.com/go-skynet/LocalAI/pkg/concurrency" "github.com/go-skynet/LocalAI/pkg/grpc/proto" - model "github.com/go-skynet/LocalAI/pkg/model" + "github.com/go-skynet/LocalAI/pkg/model" "github.com/go-skynet/LocalAI/pkg/utils" ) -func generateUniqueFileName(dir, baseName, ext string) string { - counter := 1 - fileName := baseName + ext +type TextToSpeechBackendService struct { + ml *model.ModelLoader + bcl *config.BackendConfigLoader + appConfig *config.ApplicationConfig +} - for { - filePath := filepath.Join(dir, fileName) - _, err := os.Stat(filePath) - if os.IsNotExist(err) { - return fileName - } - - counter++ - fileName = fmt.Sprintf("%s_%d%s", baseName, counter, ext) +func NewTextToSpeechBackendService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *TextToSpeechBackendService { + return &TextToSpeechBackendService{ + ml: ml, + bcl: bcl, + appConfig: appConfig, } } -func ModelTTS(backend, text, modelFile, voice string, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (string, *proto.Result, error) { +func (ttsbs *TextToSpeechBackendService) TextToAudioFile(request *schema.TTSRequest) <-chan concurrency.ErrorOr[*string] { + responseChannel := make(chan concurrency.ErrorOr[*string]) + go func(request *schema.TTSRequest) { + cfg, err := ttsbs.bcl.LoadBackendConfigFileByName(request.Model, ttsbs.appConfig.ModelPath, + config.LoadOptionDebug(ttsbs.appConfig.Debug), + config.LoadOptionThreads(ttsbs.appConfig.Threads), + config.LoadOptionContextSize(ttsbs.appConfig.ContextSize), + config.LoadOptionF16(ttsbs.appConfig.F16), + ) + if err != nil { + responseChannel <- concurrency.ErrorOr[*string]{Error: err} + close(responseChannel) + return + } + + if request.Backend != "" { + cfg.Backend = request.Backend + } + + outFile, _, err := modelTTS(cfg.Backend, request.Input, cfg.Model, request.Voice, ttsbs.ml, ttsbs.appConfig, cfg) + if err != nil { + responseChannel <- concurrency.ErrorOr[*string]{Error: err} + close(responseChannel) + return + } + responseChannel <- concurrency.ErrorOr[*string]{Value: &outFile} + close(responseChannel) + }(request) + return responseChannel +} + +func modelTTS(backend, text, modelFile string, voice string, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig *config.BackendConfig) (string, *proto.Result, error) { bb := backend if bb == "" { bb = model.PiperBackend @@ -37,7 +68,7 @@ func ModelTTS(backend, text, modelFile, voice string, loader *model.ModelLoader, grpcOpts := gRPCModelOpts(backendConfig) - opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{ + opts := modelOpts(&config.BackendConfig{}, appConfig, []model.Option{ model.WithBackendString(bb), model.WithModel(modelFile), model.WithContext(appConfig.Context), @@ -87,3 +118,19 @@ func ModelTTS(backend, text, modelFile, voice string, loader *model.ModelLoader, return filePath, res, err } + +func generateUniqueFileName(dir, baseName, ext string) string { + counter := 1 + fileName := baseName + ext + + for { + filePath := filepath.Join(dir, fileName) + _, err := os.Stat(filePath) + if os.IsNotExist(err) { + return fileName + } + + counter++ + fileName = fmt.Sprintf("%s_%d%s", baseName, counter, ext) + } +} diff --git a/core/cli/run.go b/core/cli/run.go index 09d09979..c3b186c0 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -124,11 +124,11 @@ func (r *RunCMD) Run(ctx *Context) error { } if r.PreloadBackendOnly { - _, _, _, err := startup.Startup(opts...) + _, err := startup.Startup(opts...) return err } - cl, ml, options, err := startup.Startup(opts...) + application, err := startup.Startup(opts...) if err != nil { return fmt.Errorf("failed basic startup tasks with error %s", err.Error()) @@ -137,7 +137,7 @@ func (r *RunCMD) Run(ctx *Context) error { // Watch the configuration directory // If the directory does not exist, we don't watch it if _, err := os.Stat(r.LocalaiConfigDir); err == nil { - closeConfigWatcherFn, err := startup.WatchConfigDirectory(r.LocalaiConfigDir, options) + closeConfigWatcherFn, err := startup.WatchConfigDirectory(r.LocalaiConfigDir, application.ApplicationConfig) defer closeConfigWatcherFn() if err != nil { @@ -145,7 +145,7 @@ func (r *RunCMD) Run(ctx *Context) error { } } - appHTTP, err := http.App(cl, ml, options) + appHTTP, err := http.App(application) if err != nil { log.Error().Err(err).Msg("error during HTTP App construction") return err diff --git a/core/cli/transcript.go b/core/cli/transcript.go index 9f36a77c..f14a1a87 100644 --- a/core/cli/transcript.go +++ b/core/cli/transcript.go @@ -7,6 +7,7 @@ import ( "github.com/go-skynet/LocalAI/core/backend" "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/schema" "github.com/go-skynet/LocalAI/pkg/model" ) @@ -43,11 +44,21 @@ func (t *TranscriptCMD) Run(ctx *Context) error { defer ml.StopAllGRPC() - tr, err := backend.ModelTranscription(t.Filename, t.Language, ml, c, opts) - if err != nil { - return err + tbs := backend.NewTranscriptionBackendService(ml, cl, opts) + + resultChannel := tbs.Transcribe(&schema.OpenAIRequest{ + PredictionOptions: schema.PredictionOptions{ + Language: t.Language, + }, + File: t.Filename, + }) + + r := <-resultChannel + + if r.Error != nil { + return r.Error } - for _, segment := range tr.Segments { + for _, segment := range r.Value.Segments { fmt.Println(segment.Start.String(), "-", segment.Text) } return nil diff --git a/core/cli/tts.go b/core/cli/tts.go index 1d8fd3a3..c7758c48 100644 --- a/core/cli/tts.go +++ b/core/cli/tts.go @@ -9,6 +9,7 @@ import ( "github.com/go-skynet/LocalAI/core/backend" "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/schema" "github.com/go-skynet/LocalAI/pkg/model" ) @@ -42,20 +43,29 @@ func (t *TTSCMD) Run(ctx *Context) error { defer ml.StopAllGRPC() - options := config.BackendConfig{} - options.SetDefaults() + ttsbs := backend.NewTextToSpeechBackendService(ml, config.NewBackendConfigLoader(), opts) - filePath, _, err := backend.ModelTTS(t.Backend, text, t.Model, t.Voice, ml, opts, options) - if err != nil { - return err + request := &schema.TTSRequest{ + Model: t.Model, + Input: text, + Backend: t.Backend, + Voice: t.Voice, + } + + resultsChannel := ttsbs.TextToAudioFile(request) + + rawResult := <-resultsChannel + + if rawResult.Error != nil { + return rawResult.Error } if outputFile != "" { - if err := os.Rename(filePath, outputFile); err != nil { + if err := os.Rename(*rawResult.Value, outputFile); err != nil { return err } - fmt.Printf("Generate file %s\n", outputFile) + fmt.Printf("Generated file %q\n", outputFile) } else { - fmt.Printf("Generate file %s\n", filePath) + fmt.Printf("Generated file %q\n", *rawResult.Value) } return nil } diff --git a/core/config/backend_config.go b/core/config/backend_config.go index 81c92d01..47e4829d 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -1,22 +1,7 @@ package config import ( - "errors" - "fmt" - "io/fs" - "os" - "path/filepath" - "sort" - "strings" - "sync" - "github.com/go-skynet/LocalAI/core/schema" - "github.com/go-skynet/LocalAI/pkg/downloader" - "github.com/go-skynet/LocalAI/pkg/utils" - "github.com/rs/zerolog/log" - "gopkg.in/yaml.v3" - - "github.com/charmbracelet/glamour" ) const ( @@ -199,7 +184,7 @@ func (c *BackendConfig) FunctionToCall() string { } func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) { - lo := &LoadOptions{} + lo := &ConfigLoaderOptions{} lo.Apply(opts...) ctx := lo.ctxSize @@ -312,287 +297,3 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) { cfg.Debug = &trueV } } - -////// Config Loader //////// - -type BackendConfigLoader struct { - configs map[string]BackendConfig - sync.Mutex -} - -type LoadOptions struct { - debug bool - threads, ctxSize int - f16 bool -} - -func LoadOptionDebug(debug bool) ConfigLoaderOption { - return func(o *LoadOptions) { - o.debug = debug - } -} - -func LoadOptionThreads(threads int) ConfigLoaderOption { - return func(o *LoadOptions) { - o.threads = threads - } -} - -func LoadOptionContextSize(ctxSize int) ConfigLoaderOption { - return func(o *LoadOptions) { - o.ctxSize = ctxSize - } -} - -func LoadOptionF16(f16 bool) ConfigLoaderOption { - return func(o *LoadOptions) { - o.f16 = f16 - } -} - -type ConfigLoaderOption func(*LoadOptions) - -func (lo *LoadOptions) Apply(options ...ConfigLoaderOption) { - for _, l := range options { - l(lo) - } -} - -// Load a config file for a model -func (cl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath string, opts ...ConfigLoaderOption) (*BackendConfig, error) { - - // Load a config file if present after the model name - cfg := &BackendConfig{ - PredictionOptions: schema.PredictionOptions{ - Model: modelName, - }, - } - - cfgExisting, exists := cl.GetBackendConfig(modelName) - if exists { - cfg = &cfgExisting - } else { - // Try loading a model config file - modelConfig := filepath.Join(modelPath, modelName+".yaml") - if _, err := os.Stat(modelConfig); err == nil { - if err := cl.LoadBackendConfig( - modelConfig, opts..., - ); err != nil { - return nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error()) - } - cfgExisting, exists = cl.GetBackendConfig(modelName) - if exists { - cfg = &cfgExisting - } - } - } - - cfg.SetDefaults(opts...) - - return cfg, nil -} - -func NewBackendConfigLoader() *BackendConfigLoader { - return &BackendConfigLoader{ - configs: make(map[string]BackendConfig), - } -} -func ReadBackendConfigFile(file string, opts ...ConfigLoaderOption) ([]*BackendConfig, error) { - c := &[]*BackendConfig{} - f, err := os.ReadFile(file) - if err != nil { - return nil, fmt.Errorf("cannot read config file: %w", err) - } - if err := yaml.Unmarshal(f, c); err != nil { - return nil, fmt.Errorf("cannot unmarshal config file: %w", err) - } - - for _, cc := range *c { - cc.SetDefaults(opts...) - } - - return *c, nil -} - -func ReadBackendConfig(file string, opts ...ConfigLoaderOption) (*BackendConfig, error) { - lo := &LoadOptions{} - lo.Apply(opts...) - - c := &BackendConfig{} - f, err := os.ReadFile(file) - if err != nil { - return nil, fmt.Errorf("cannot read config file: %w", err) - } - if err := yaml.Unmarshal(f, c); err != nil { - return nil, fmt.Errorf("cannot unmarshal config file: %w", err) - } - - c.SetDefaults(opts...) - return c, nil -} - -func (cm *BackendConfigLoader) LoadBackendConfigFile(file string, opts ...ConfigLoaderOption) error { - cm.Lock() - defer cm.Unlock() - c, err := ReadBackendConfigFile(file, opts...) - if err != nil { - return fmt.Errorf("cannot load config file: %w", err) - } - - for _, cc := range c { - cm.configs[cc.Name] = *cc - } - return nil -} - -func (cl *BackendConfigLoader) LoadBackendConfig(file string, opts ...ConfigLoaderOption) error { - cl.Lock() - defer cl.Unlock() - c, err := ReadBackendConfig(file, opts...) - if err != nil { - return fmt.Errorf("cannot read config file: %w", err) - } - - cl.configs[c.Name] = *c - return nil -} - -func (cl *BackendConfigLoader) GetBackendConfig(m string) (BackendConfig, bool) { - cl.Lock() - defer cl.Unlock() - v, exists := cl.configs[m] - return v, exists -} - -func (cl *BackendConfigLoader) GetAllBackendConfigs() []BackendConfig { - cl.Lock() - defer cl.Unlock() - var res []BackendConfig - for _, v := range cl.configs { - res = append(res, v) - } - - sort.SliceStable(res, func(i, j int) bool { - return res[i].Name < res[j].Name - }) - - return res -} - -func (cl *BackendConfigLoader) ListBackendConfigs() []string { - cl.Lock() - defer cl.Unlock() - var res []string - for k := range cl.configs { - res = append(res, k) - } - return res -} - -// Preload prepare models if they are not local but url or huggingface repositories -func (cl *BackendConfigLoader) Preload(modelPath string) error { - cl.Lock() - defer cl.Unlock() - - status := func(fileName, current, total string, percent float64) { - utils.DisplayDownloadFunction(fileName, current, total, percent) - } - - log.Info().Msgf("Preloading models from %s", modelPath) - - renderMode := "dark" - if os.Getenv("COLOR") != "" { - renderMode = os.Getenv("COLOR") - } - - glamText := func(t string) { - out, err := glamour.Render(t, renderMode) - if err == nil && os.Getenv("NO_COLOR") == "" { - fmt.Println(out) - } else { - fmt.Println(t) - } - } - - for i, config := range cl.configs { - - // Download files and verify their SHA - for _, file := range config.DownloadFiles { - log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename) - - if err := utils.VerifyPath(file.Filename, modelPath); err != nil { - return err - } - // Create file path - filePath := filepath.Join(modelPath, file.Filename) - - if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, status); err != nil { - return err - } - } - - modelURL := config.PredictionOptions.Model - modelURL = downloader.ConvertURL(modelURL) - - if downloader.LooksLikeURL(modelURL) { - // md5 of model name - md5Name := utils.MD5(modelURL) - - // check if file exists - if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) { - err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", status) - if err != nil { - return err - } - } - - cc := cl.configs[i] - c := &cc - c.PredictionOptions.Model = md5Name - cl.configs[i] = *c - } - if cl.configs[i].Name != "" { - glamText(fmt.Sprintf("**Model name**: _%s_", cl.configs[i].Name)) - } - if cl.configs[i].Description != "" { - //glamText("**Description**") - glamText(cl.configs[i].Description) - } - if cl.configs[i].Usage != "" { - //glamText("**Usage**") - glamText(cl.configs[i].Usage) - } - } - return nil -} - -// LoadBackendConfigsFromPath reads all the configurations of the models from a path -// (non-recursive) -func (cm *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...ConfigLoaderOption) error { - cm.Lock() - defer cm.Unlock() - entries, err := os.ReadDir(path) - if err != nil { - return err - } - files := make([]fs.FileInfo, 0, len(entries)) - for _, entry := range entries { - info, err := entry.Info() - if err != nil { - return err - } - files = append(files, info) - } - for _, file := range files { - // Skip templates, YAML and .keep files - if !strings.Contains(file.Name(), ".yaml") && !strings.Contains(file.Name(), ".yml") { - continue - } - c, err := ReadBackendConfig(filepath.Join(path, file.Name()), opts...) - if err == nil { - cm.configs[c.Name] = *c - } - } - - return nil -} diff --git a/core/config/backend_config_loader.go b/core/config/backend_config_loader.go new file mode 100644 index 00000000..62dfc1e0 --- /dev/null +++ b/core/config/backend_config_loader.go @@ -0,0 +1,509 @@ +package config + +import ( + "encoding/json" + "errors" + "fmt" + "io/fs" + "os" + "path/filepath" + "sort" + "strings" + "sync" + + "github.com/charmbracelet/glamour" + "github.com/go-skynet/LocalAI/core/schema" + "github.com/go-skynet/LocalAI/pkg/downloader" + "github.com/go-skynet/LocalAI/pkg/grammar" + "github.com/go-skynet/LocalAI/pkg/utils" + "github.com/rs/zerolog/log" + "gopkg.in/yaml.v2" +) + +type BackendConfigLoader struct { + configs map[string]BackendConfig + sync.Mutex +} + +type ConfigLoaderOptions struct { + debug bool + threads, ctxSize int + f16 bool +} + +func LoadOptionDebug(debug bool) ConfigLoaderOption { + return func(o *ConfigLoaderOptions) { + o.debug = debug + } +} + +func LoadOptionThreads(threads int) ConfigLoaderOption { + return func(o *ConfigLoaderOptions) { + o.threads = threads + } +} + +func LoadOptionContextSize(ctxSize int) ConfigLoaderOption { + return func(o *ConfigLoaderOptions) { + o.ctxSize = ctxSize + } +} + +func LoadOptionF16(f16 bool) ConfigLoaderOption { + return func(o *ConfigLoaderOptions) { + o.f16 = f16 + } +} + +type ConfigLoaderOption func(*ConfigLoaderOptions) + +func (lo *ConfigLoaderOptions) Apply(options ...ConfigLoaderOption) { + for _, l := range options { + l(lo) + } +} + +func NewBackendConfigLoader() *BackendConfigLoader { + return &BackendConfigLoader{ + configs: make(map[string]BackendConfig), + } +} + +func (bcl *BackendConfigLoader) LoadBackendConfig(file string, opts ...ConfigLoaderOption) error { + bcl.Lock() + defer bcl.Unlock() + c, err := readBackendConfig(file, opts...) + if err != nil { + return fmt.Errorf("cannot read config file: %w", err) + } + + bcl.configs[c.Name] = *c + return nil +} + +func (bcl *BackendConfigLoader) GetBackendConfig(m string) (BackendConfig, bool) { + bcl.Lock() + defer bcl.Unlock() + v, exists := bcl.configs[m] + return v, exists +} + +func (bcl *BackendConfigLoader) GetAllBackendConfigs() []BackendConfig { + bcl.Lock() + defer bcl.Unlock() + var res []BackendConfig + for _, v := range bcl.configs { + res = append(res, v) + } + sort.SliceStable(res, func(i, j int) bool { + return res[i].Name < res[j].Name + }) + return res +} + +func (bcl *BackendConfigLoader) ListBackendConfigs() []string { + bcl.Lock() + defer bcl.Unlock() + var res []string + for k := range bcl.configs { + res = append(res, k) + } + return res +} + +// Preload prepare models if they are not local but url or huggingface repositories +func (bcl *BackendConfigLoader) Preload(modelPath string) error { + bcl.Lock() + defer bcl.Unlock() + + status := func(fileName, current, total string, percent float64) { + utils.DisplayDownloadFunction(fileName, current, total, percent) + } + + log.Info().Msgf("Preloading models from %s", modelPath) + + renderMode := "dark" + if os.Getenv("COLOR") != "" { + renderMode = os.Getenv("COLOR") + } + + glamText := func(t string) { + out, err := glamour.Render(t, renderMode) + if err == nil && os.Getenv("NO_COLOR") == "" { + fmt.Println(out) + } else { + fmt.Println(t) + } + } + + for i, config := range bcl.configs { + + // Download files and verify their SHA + for _, file := range config.DownloadFiles { + log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename) + + if err := utils.VerifyPath(file.Filename, modelPath); err != nil { + return err + } + // Create file path + filePath := filepath.Join(modelPath, file.Filename) + + if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, status); err != nil { + return err + } + } + + modelURL := config.PredictionOptions.Model + modelURL = downloader.ConvertURL(modelURL) + + if downloader.LooksLikeURL(modelURL) { + // md5 of model name + md5Name := utils.MD5(modelURL) + + // check if file exists + if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) { + err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", status) + if err != nil { + return err + } + } + + cc := bcl.configs[i] + c := &cc + c.PredictionOptions.Model = md5Name + bcl.configs[i] = *c + } + if bcl.configs[i].Name != "" { + glamText(fmt.Sprintf("**Model name**: _%s_", bcl.configs[i].Name)) + } + if bcl.configs[i].Description != "" { + //glamText("**Description**") + glamText(bcl.configs[i].Description) + } + if bcl.configs[i].Usage != "" { + //glamText("**Usage**") + glamText(bcl.configs[i].Usage) + } + } + return nil +} + +func (bcl *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...ConfigLoaderOption) error { + bcl.Lock() + defer bcl.Unlock() + entries, err := os.ReadDir(path) + if err != nil { + return err + } + files := make([]fs.FileInfo, 0, len(entries)) + for _, entry := range entries { + info, err := entry.Info() + if err != nil { + return err + } + files = append(files, info) + } + for _, file := range files { + // Skip templates, YAML and .keep files + if !strings.Contains(file.Name(), ".yaml") && !strings.Contains(file.Name(), ".yml") { + continue + } + c, err := readBackendConfig(filepath.Join(path, file.Name()), opts...) + if err == nil { + bcl.configs[c.Name] = *c + } + } + + return nil +} + +func (bcl *BackendConfigLoader) LoadBackendConfigFile(file string, opts ...ConfigLoaderOption) error { + bcl.Lock() + defer bcl.Unlock() + c, err := readBackendConfigFile(file, opts...) + if err != nil { + return fmt.Errorf("cannot load config file: %w", err) + } + + for _, cc := range c { + bcl.configs[cc.Name] = *cc + } + return nil +} + +////////// + +// Load a config file for a model +func (bcl *BackendConfigLoader) LoadBackendConfigFileByName(modelName string, modelPath string, opts ...ConfigLoaderOption) (*BackendConfig, error) { + + // Load a config file if present after the model name + cfg := &BackendConfig{ + PredictionOptions: schema.PredictionOptions{ + Model: modelName, + }, + } + + cfgExisting, exists := bcl.GetBackendConfig(modelName) + if exists { + cfg = &cfgExisting + } else { + // Load a config file if present after the model name + modelConfig := filepath.Join(modelPath, modelName+".yaml") + if _, err := os.Stat(modelConfig); err == nil { + if err := bcl.LoadBackendConfig(modelConfig); err != nil { + return nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error()) + } + cfgExisting, exists = bcl.GetBackendConfig(modelName) + if exists { + cfg = &cfgExisting + } + } + } + + cfg.SetDefaults(opts...) + return cfg, nil +} + +func readBackendConfigFile(file string, opts ...ConfigLoaderOption) ([]*BackendConfig, error) { + c := &[]*BackendConfig{} + f, err := os.ReadFile(file) + if err != nil { + return nil, fmt.Errorf("cannot read config file: %w", err) + } + if err := yaml.Unmarshal(f, c); err != nil { + return nil, fmt.Errorf("cannot unmarshal config file: %w", err) + } + + for _, cc := range *c { + cc.SetDefaults(opts...) + } + + return *c, nil +} + +func readBackendConfig(file string, opts ...ConfigLoaderOption) (*BackendConfig, error) { + c := &BackendConfig{} + f, err := os.ReadFile(file) + if err != nil { + return nil, fmt.Errorf("cannot read config file: %w", err) + } + if err := yaml.Unmarshal(f, c); err != nil { + return nil, fmt.Errorf("cannot unmarshal config file: %w", err) + } + + c.SetDefaults(opts...) + return c, nil +} + +func (bcl *BackendConfigLoader) LoadBackendConfigForModelAndOpenAIRequest(modelFile string, input *schema.OpenAIRequest, appConfig *ApplicationConfig) (*BackendConfig, *schema.OpenAIRequest, error) { + cfg, err := bcl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath, + LoadOptionContextSize(appConfig.ContextSize), + LoadOptionDebug(appConfig.Debug), + LoadOptionF16(appConfig.F16), + LoadOptionThreads(appConfig.Threads), + ) + + // Set the parameters for the language model prediction + updateBackendConfigFromOpenAIRequest(cfg, input) + + return cfg, input, err +} + +func updateBackendConfigFromOpenAIRequest(bc *BackendConfig, request *schema.OpenAIRequest) { + if request.Echo { + bc.Echo = request.Echo + } + if request.TopK != nil && *request.TopK != 0 { + bc.TopK = request.TopK + } + if request.TopP != nil && *request.TopP != 0 { + bc.TopP = request.TopP + } + + if request.Backend != "" { + bc.Backend = request.Backend + } + + if request.ClipSkip != 0 { + bc.Diffusers.ClipSkip = request.ClipSkip + } + + if request.ModelBaseName != "" { + bc.AutoGPTQ.ModelBaseName = request.ModelBaseName + } + + if request.NegativePromptScale != 0 { + bc.NegativePromptScale = request.NegativePromptScale + } + + if request.UseFastTokenizer { + bc.UseFastTokenizer = request.UseFastTokenizer + } + + if request.NegativePrompt != "" { + bc.NegativePrompt = request.NegativePrompt + } + + if request.RopeFreqBase != 0 { + bc.RopeFreqBase = request.RopeFreqBase + } + + if request.RopeFreqScale != 0 { + bc.RopeFreqScale = request.RopeFreqScale + } + + if request.Grammar != "" { + bc.Grammar = request.Grammar + } + + if request.Temperature != nil && *request.Temperature != 0 { + bc.Temperature = request.Temperature + } + + if request.Maxtokens != nil && *request.Maxtokens != 0 { + bc.Maxtokens = request.Maxtokens + } + + switch stop := request.Stop.(type) { + case string: + if stop != "" { + bc.StopWords = append(bc.StopWords, stop) + } + case []interface{}: + for _, pp := range stop { + if s, ok := pp.(string); ok { + bc.StopWords = append(bc.StopWords, s) + } + } + } + + if len(request.Tools) > 0 { + for _, tool := range request.Tools { + request.Functions = append(request.Functions, tool.Function) + } + } + + if request.ToolsChoice != nil { + var toolChoice grammar.Tool + switch content := request.ToolsChoice.(type) { + case string: + _ = json.Unmarshal([]byte(content), &toolChoice) + case map[string]interface{}: + dat, _ := json.Marshal(content) + _ = json.Unmarshal(dat, &toolChoice) + } + request.FunctionCall = map[string]interface{}{ + "name": toolChoice.Function.Name, + } + } + + // Decode each request's message content + index := 0 + for i, m := range request.Messages { + switch content := m.Content.(type) { + case string: + request.Messages[i].StringContent = content + case []interface{}: + dat, _ := json.Marshal(content) + c := []schema.Content{} + json.Unmarshal(dat, &c) + for _, pp := range c { + if pp.Type == "text" { + request.Messages[i].StringContent = pp.Text + } else if pp.Type == "image_url" { + // Detect if pp.ImageURL is an URL, if it is download the image and encode it in base64: + base64, err := utils.GetImageURLAsBase64(pp.ImageURL.URL) + if err == nil { + request.Messages[i].StringImages = append(request.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff + // set a placeholder for each image + request.Messages[i].StringContent = fmt.Sprintf("[img-%d]", index) + request.Messages[i].StringContent + index++ + } else { + fmt.Print("Failed encoding image", err) + } + } + } + } + } + + if request.RepeatPenalty != 0 { + bc.RepeatPenalty = request.RepeatPenalty + } + + if request.FrequencyPenalty != 0 { + bc.FrequencyPenalty = request.FrequencyPenalty + } + + if request.PresencePenalty != 0 { + bc.PresencePenalty = request.PresencePenalty + } + + if request.Keep != 0 { + bc.Keep = request.Keep + } + + if request.Batch != 0 { + bc.Batch = request.Batch + } + + if request.IgnoreEOS { + bc.IgnoreEOS = request.IgnoreEOS + } + + if request.Seed != nil { + bc.Seed = request.Seed + } + + if request.TypicalP != nil { + bc.TypicalP = request.TypicalP + } + + switch inputs := request.Input.(type) { + case string: + if inputs != "" { + bc.InputStrings = append(bc.InputStrings, inputs) + } + case []interface{}: + for _, pp := range inputs { + switch i := pp.(type) { + case string: + bc.InputStrings = append(bc.InputStrings, i) + case []interface{}: + tokens := []int{} + for _, ii := range i { + tokens = append(tokens, int(ii.(float64))) + } + bc.InputToken = append(bc.InputToken, tokens) + } + } + } + + // Can be either a string or an object + switch fnc := request.FunctionCall.(type) { + case string: + if fnc != "" { + bc.SetFunctionCallString(fnc) + } + case map[string]interface{}: + var name string + n, exists := fnc["name"] + if exists { + nn, e := n.(string) + if e { + name = nn + } + } + bc.SetFunctionCallNameString(name) + } + + switch p := request.Prompt.(type) { + case string: + bc.PromptStrings = append(bc.PromptStrings, p) + case []interface{}: + for _, pp := range p { + if s, ok := pp.(string); ok { + bc.PromptStrings = append(bc.PromptStrings, s) + } + } + } +} diff --git a/core/config/exports_test.go b/core/config/exports_test.go new file mode 100644 index 00000000..70ba84e6 --- /dev/null +++ b/core/config/exports_test.go @@ -0,0 +1,6 @@ +package config + +// This file re-exports private functions to be used directly in unit tests. +// Since this file's name ends in _test.go, theoretically these should not be exposed past the tests. + +var ReadBackendConfigFile = readBackendConfigFile diff --git a/core/http/api.go b/core/http/api.go index af38512a..5c9095ea 100644 --- a/core/http/api.go +++ b/core/http/api.go @@ -1,23 +1,20 @@ package http import ( - "encoding/json" "errors" - "os" "strings" - "github.com/go-skynet/LocalAI/pkg/utils" + "github.com/go-skynet/LocalAI/core" + fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" "github.com/gofiber/swagger" // swagger handler "github.com/go-skynet/LocalAI/core/http/endpoints/elevenlabs" "github.com/go-skynet/LocalAI/core/http/endpoints/localai" "github.com/go-skynet/LocalAI/core/http/endpoints/openai" - - "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/schema" "github.com/go-skynet/LocalAI/core/services" "github.com/go-skynet/LocalAI/internal" - "github.com/go-skynet/LocalAI/pkg/model" + model "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2/middleware/cors" @@ -55,13 +52,12 @@ func readAuthHeader(c *fiber.Ctx) string { // @securityDefinitions.apikey BearerAuth // @in header // @name Authorization - -func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (*fiber.App, error) { +func App(application *core.Application) (*fiber.App, error) { // Return errors as JSON responses app := fiber.New(fiber.Config{ Views: renderEngine(), - BodyLimit: appConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB - DisableStartupMessage: appConfig.DisableMessage, + BodyLimit: application.ApplicationConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB + DisableStartupMessage: application.ApplicationConfig.DisableMessage, // Override default error handler ErrorHandler: func(ctx *fiber.Ctx, err error) error { // Status code defaults to 500 @@ -82,7 +78,7 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi }, }) - if appConfig.Debug { + if application.ApplicationConfig.Debug { app.Use(logger.New(logger.Config{ Format: "[${ip}]:${port} ${status} - ${method} ${path}\n", })) @@ -90,7 +86,7 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi // Default middleware config - if !appConfig.Debug { + if !application.ApplicationConfig.Debug { app.Use(recover.New()) } @@ -108,27 +104,27 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi // Auth middleware checking if API key is valid. If no API key is set, no auth is required. auth := func(c *fiber.Ctx) error { - if len(appConfig.ApiKeys) == 0 { + if len(application.ApplicationConfig.ApiKeys) == 0 { return c.Next() } - // Check for api_keys.json file - fileContent, err := os.ReadFile("api_keys.json") - if err == nil { - // Parse JSON content from the file - var fileKeys []string - err := json.Unmarshal(fileContent, &fileKeys) - if err != nil { - return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{"message": "Error parsing api_keys.json"}) - } + // // Check for api_keys.json file + // fileContent, err := os.ReadFile("api_keys.json") + // if err == nil { + // // Parse JSON content from the file + // var fileKeys []string + // err := json.Unmarshal(fileContent, &fileKeys) + // if err != nil { + // return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{"message": "Error parsing api_keys.json"}) + // } - // Add file keys to options.ApiKeys - appConfig.ApiKeys = append(appConfig.ApiKeys, fileKeys...) - } + // // Add file keys to options.ApiKeys + // application.ApplicationConfig.ApiKeys = append(application.ApplicationConfig.ApiKeys, fileKeys...) + // } - if len(appConfig.ApiKeys) == 0 { - return c.Next() - } + // if len(application.ApplicationConfig.ApiKeys) == 0 { + // return c.Next() + // } authHeader := readAuthHeader(c) if authHeader == "" { @@ -142,7 +138,7 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi } apiKey := authHeaderParts[1] - for _, key := range appConfig.ApiKeys { + for _, key := range application.ApplicationConfig.ApiKeys { if apiKey == key { return c.Next() } @@ -151,20 +147,22 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid API key"}) } - if appConfig.CORS { + if application.ApplicationConfig.CORS { var c func(ctx *fiber.Ctx) error - if appConfig.CORSAllowOrigins == "" { + if application.ApplicationConfig.CORSAllowOrigins == "" { c = cors.New() } else { - c = cors.New(cors.Config{AllowOrigins: appConfig.CORSAllowOrigins}) + c = cors.New(cors.Config{AllowOrigins: application.ApplicationConfig.CORSAllowOrigins}) } app.Use(c) } + fiberContextExtractor := fiberContext.NewFiberContextExtractor(application.ModelLoader, application.ApplicationConfig) + // LocalAI API endpoints - galleryService := services.NewGalleryService(appConfig.ModelPath) - galleryService.Start(appConfig.Context, cl) + galleryService := services.NewGalleryService(application.ApplicationConfig.ModelPath) + galleryService.Start(application.ApplicationConfig.Context, application.BackendConfigLoader) app.Get("/version", auth, func(c *fiber.Ctx) error { return c.JSON(struct { @@ -172,29 +170,17 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi }{Version: internal.PrintableVersion()}) }) - // Make sure directories exists - os.MkdirAll(appConfig.ImageDir, 0755) - os.MkdirAll(appConfig.AudioDir, 0755) - os.MkdirAll(appConfig.UploadDir, 0755) - os.MkdirAll(appConfig.ConfigsDir, 0755) - os.MkdirAll(appConfig.ModelPath, 0755) - - // Load config jsons - utils.LoadConfig(appConfig.UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles) - utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants) - utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles) - app.Get("/swagger/*", swagger.HandlerDefault) // default welcomeRoute( app, - cl, - ml, - appConfig, + application.BackendConfigLoader, + application.ModelLoader, + application.ApplicationConfig, auth, ) - modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService) + modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(application.ApplicationConfig.Galleries, application.ApplicationConfig.ModelPath, galleryService) app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint()) app.Get("/models/available", auth, modelGalleryEndpointService.ListModelFromGalleryEndpoint()) app.Get("/models/galleries", auth, modelGalleryEndpointService.ListModelGalleriesEndpoint()) @@ -203,83 +189,85 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi app.Get("/models/jobs/:uuid", auth, modelGalleryEndpointService.GetOpStatusEndpoint()) app.Get("/models/jobs", auth, modelGalleryEndpointService.GetAllStatusEndpoint()) - app.Post("/tts", auth, localai.TTSEndpoint(cl, ml, appConfig)) - - // Elevenlabs - app.Post("/v1/text-to-speech/:voice-id", auth, elevenlabs.TTSEndpoint(cl, ml, appConfig)) - // Stores - sl := model.NewModelLoader("") - app.Post("/stores/set", auth, localai.StoresSetEndpoint(sl, appConfig)) - app.Post("/stores/delete", auth, localai.StoresDeleteEndpoint(sl, appConfig)) - app.Post("/stores/get", auth, localai.StoresGetEndpoint(sl, appConfig)) - app.Post("/stores/find", auth, localai.StoresFindEndpoint(sl, appConfig)) + storeLoader := model.NewModelLoader("") // TODO: Investigate if this should be migrated to application and reused. Should the path be configurable? Merging for now. + app.Post("/stores/set", auth, localai.StoresSetEndpoint(storeLoader, application.ApplicationConfig)) + app.Post("/stores/delete", auth, localai.StoresDeleteEndpoint(storeLoader, application.ApplicationConfig)) + app.Post("/stores/get", auth, localai.StoresGetEndpoint(storeLoader, application.ApplicationConfig)) + app.Post("/stores/find", auth, localai.StoresFindEndpoint(storeLoader, application.ApplicationConfig)) - // openAI compatible API endpoint + // openAI compatible API endpoints // chat - app.Post("/v1/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig)) - app.Post("/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig)) + app.Post("/v1/chat/completions", auth, openai.ChatEndpoint(fiberContextExtractor, application.OpenAIService)) + app.Post("/chat/completions", auth, openai.ChatEndpoint(fiberContextExtractor, application.OpenAIService)) // edit - app.Post("/v1/edits", auth, openai.EditEndpoint(cl, ml, appConfig)) - app.Post("/edits", auth, openai.EditEndpoint(cl, ml, appConfig)) + app.Post("/v1/edits", auth, openai.EditEndpoint(fiberContextExtractor, application.OpenAIService)) + app.Post("/edits", auth, openai.EditEndpoint(fiberContextExtractor, application.OpenAIService)) // assistant - app.Get("/v1/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig)) - app.Get("/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig)) - app.Post("/v1/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig)) - app.Post("/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig)) - app.Delete("/v1/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig)) - app.Delete("/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig)) - app.Get("/v1/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig)) - app.Get("/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig)) - app.Post("/v1/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig)) - app.Post("/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig)) - app.Get("/v1/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig)) - app.Get("/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig)) - app.Post("/v1/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig)) - app.Post("/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig)) - app.Delete("/v1/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig)) - app.Delete("/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig)) - app.Get("/v1/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig)) - app.Get("/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig)) + // TODO: Refactor this to the new style eventually + app.Get("/v1/assistants", auth, openai.ListAssistantsEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) + app.Get("/assistants", auth, openai.ListAssistantsEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) + app.Post("/v1/assistants", auth, openai.CreateAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) + app.Post("/assistants", auth, openai.CreateAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) + app.Delete("/v1/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) + app.Delete("/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) + app.Get("/v1/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) + app.Get("/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) + app.Post("/v1/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) + app.Post("/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) + app.Get("/v1/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) + app.Get("/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) + app.Post("/v1/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) + app.Post("/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) + app.Delete("/v1/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) + app.Delete("/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) + app.Get("/v1/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) + app.Get("/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) // files - app.Post("/v1/files", auth, openai.UploadFilesEndpoint(cl, appConfig)) - app.Post("/files", auth, openai.UploadFilesEndpoint(cl, appConfig)) - app.Get("/v1/files", auth, openai.ListFilesEndpoint(cl, appConfig)) - app.Get("/files", auth, openai.ListFilesEndpoint(cl, appConfig)) - app.Get("/v1/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig)) - app.Get("/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig)) - app.Delete("/v1/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig)) - app.Delete("/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig)) - app.Get("/v1/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig)) - app.Get("/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig)) + app.Post("/v1/files", auth, openai.UploadFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) + app.Post("/files", auth, openai.UploadFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) + app.Get("/v1/files", auth, openai.ListFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) + app.Get("/files", auth, openai.ListFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) + app.Get("/v1/files/:file_id", auth, openai.GetFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) + app.Get("/files/:file_id", auth, openai.GetFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) + app.Delete("/v1/files/:file_id", auth, openai.DeleteFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) + app.Delete("/files/:file_id", auth, openai.DeleteFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) + app.Get("/v1/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) + app.Get("/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) // completion - app.Post("/v1/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig)) - app.Post("/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig)) - app.Post("/v1/engines/:model/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig)) + app.Post("/v1/completions", auth, openai.CompletionEndpoint(fiberContextExtractor, application.OpenAIService)) + app.Post("/completions", auth, openai.CompletionEndpoint(fiberContextExtractor, application.OpenAIService)) + app.Post("/v1/engines/:model/completions", auth, openai.CompletionEndpoint(fiberContextExtractor, application.OpenAIService)) // embeddings - app.Post("/v1/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig)) - app.Post("/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig)) - app.Post("/v1/engines/:model/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig)) + app.Post("/v1/embeddings", auth, openai.EmbeddingsEndpoint(fiberContextExtractor, application.EmbeddingsBackendService)) + app.Post("/embeddings", auth, openai.EmbeddingsEndpoint(fiberContextExtractor, application.EmbeddingsBackendService)) + app.Post("/v1/engines/:model/embeddings", auth, openai.EmbeddingsEndpoint(fiberContextExtractor, application.EmbeddingsBackendService)) // audio - app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(cl, ml, appConfig)) - app.Post("/v1/audio/speech", auth, localai.TTSEndpoint(cl, ml, appConfig)) + app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(fiberContextExtractor, application.TranscriptionBackendService)) + app.Post("/v1/audio/speech", auth, localai.TTSEndpoint(fiberContextExtractor, application.TextToSpeechBackendService)) // images - app.Post("/v1/images/generations", auth, openai.ImageEndpoint(cl, ml, appConfig)) + app.Post("/v1/images/generations", auth, openai.ImageEndpoint(fiberContextExtractor, application.ImageGenerationBackendService)) - if appConfig.ImageDir != "" { - app.Static("/generated-images", appConfig.ImageDir) + // Elevenlabs + app.Post("/v1/text-to-speech/:voice-id", auth, elevenlabs.TTSEndpoint(fiberContextExtractor, application.TextToSpeechBackendService)) + + // LocalAI TTS? + app.Post("/tts", auth, localai.TTSEndpoint(fiberContextExtractor, application.TextToSpeechBackendService)) + + if application.ApplicationConfig.ImageDir != "" { + app.Static("/generated-images", application.ApplicationConfig.ImageDir) } - if appConfig.AudioDir != "" { - app.Static("/generated-audio", appConfig.AudioDir) + if application.ApplicationConfig.AudioDir != "" { + app.Static("/generated-audio", application.ApplicationConfig.AudioDir) } ok := func(c *fiber.Ctx) error { @@ -291,13 +279,12 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi app.Get("/readyz", ok) // Experimental Backend Statistics Module - backendMonitor := services.NewBackendMonitor(cl, ml, appConfig) // Split out for now - app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(backendMonitor)) - app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(backendMonitor)) + app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(application.BackendMonitorService)) + app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(application.BackendMonitorService)) // models - app.Get("/v1/models", auth, openai.ListModelsEndpoint(cl, ml)) - app.Get("/models", auth, openai.ListModelsEndpoint(cl, ml)) + app.Get("/v1/models", auth, openai.ListModelsEndpoint(application.ListModelsService)) + app.Get("/models", auth, openai.ListModelsEndpoint(application.ListModelsService)) app.Get("/metrics", auth, localai.LocalAIMetricsEndpoint()) diff --git a/core/http/api_test.go b/core/http/api_test.go index 1553ed21..bf8feb1c 100644 --- a/core/http/api_test.go +++ b/core/http/api_test.go @@ -12,7 +12,9 @@ import ( "os" "path/filepath" "runtime" + "strings" + "github.com/go-skynet/LocalAI/core" "github.com/go-skynet/LocalAI/core/config" . "github.com/go-skynet/LocalAI/core/http" "github.com/go-skynet/LocalAI/core/schema" @@ -205,9 +207,7 @@ var _ = Describe("API test", func() { var cancel context.CancelFunc var tmpdir string var modelDir string - var bcl *config.BackendConfigLoader - var ml *model.ModelLoader - var applicationConfig *config.ApplicationConfig + var application *core.Application commonOpts := []config.AppOption{ config.WithDebug(true), @@ -252,7 +252,7 @@ var _ = Describe("API test", func() { }, } - bcl, ml, applicationConfig, err = startup.Startup( + application, err = startup.Startup( append(commonOpts, config.WithContext(c), config.WithGalleries(galleries), @@ -261,7 +261,7 @@ var _ = Describe("API test", func() { config.WithBackendAssetsOutput(backendAssetsDir))...) Expect(err).ToNot(HaveOccurred()) - app, err = App(bcl, ml, applicationConfig) + app, err = App(application) Expect(err).ToNot(HaveOccurred()) go app.Listen("127.0.0.1:9090") @@ -474,11 +474,11 @@ var _ = Describe("API test", func() { }) Expect(err).ToNot(HaveOccurred()) Expect(len(resp2.Choices)).To(Equal(1)) - Expect(resp2.Choices[0].Message.FunctionCall).ToNot(BeNil()) - Expect(resp2.Choices[0].Message.FunctionCall.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.FunctionCall.Name) + Expect(resp2.Choices[0].Message.ToolCalls[0].Function).ToNot(BeNil()) + Expect(resp2.Choices[0].Message.ToolCalls[0].Function.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.ToolCalls[0].Function.Name) var res map[string]string - err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res) + err = json.Unmarshal([]byte(resp2.Choices[0].Message.ToolCalls[0].Function.Arguments), &res) Expect(err).ToNot(HaveOccurred()) Expect(res["location"]).To(Equal("San Francisco"), fmt.Sprint(res)) Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res)) @@ -487,9 +487,9 @@ var _ = Describe("API test", func() { }) It("runs openllama gguf(llama-cpp)", Label("llama-gguf"), func() { - if runtime.GOOS != "linux" { - Skip("test supported only on linux") - } + // if runtime.GOOS != "linux" { + // Skip("test supported only on linux") + // } modelName := "codellama" response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{ URL: "github:go-skynet/model-gallery/codellama-7b-instruct.yaml", @@ -504,7 +504,7 @@ var _ = Describe("API test", func() { Eventually(func() bool { response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid) return response["processed"].(bool) - }, "360s", "10s").Should(Equal(true)) + }, "480s", "10s").Should(Equal(true)) By("testing chat") resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: modelName, Messages: []openai.ChatCompletionMessage{ @@ -551,11 +551,13 @@ var _ = Describe("API test", func() { }) Expect(err).ToNot(HaveOccurred()) Expect(len(resp2.Choices)).To(Equal(1)) - Expect(resp2.Choices[0].Message.FunctionCall).ToNot(BeNil()) - Expect(resp2.Choices[0].Message.FunctionCall.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.FunctionCall.Name) + fmt.Printf("\n--- %+v\n\n", resp2.Choices[0].Message) + Expect(resp2.Choices[0].Message.ToolCalls).ToNot(BeNil()) + Expect(resp2.Choices[0].Message.ToolCalls[0]).ToNot(BeNil()) + Expect(resp2.Choices[0].Message.ToolCalls[0].Function.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.ToolCalls[0].Function.Name) var res map[string]string - err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res) + err = json.Unmarshal([]byte(resp2.Choices[0].Message.ToolCalls[0].Function.Arguments), &res) Expect(err).ToNot(HaveOccurred()) Expect(res["location"]).To(Equal("San Francisco"), fmt.Sprint(res)) Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res)) @@ -609,7 +611,7 @@ var _ = Describe("API test", func() { }, } - bcl, ml, applicationConfig, err = startup.Startup( + application, err = startup.Startup( append(commonOpts, config.WithContext(c), config.WithAudioDir(tmpdir), @@ -620,7 +622,7 @@ var _ = Describe("API test", func() { config.WithBackendAssetsOutput(tmpdir))..., ) Expect(err).ToNot(HaveOccurred()) - app, err = App(bcl, ml, applicationConfig) + app, err = App(application) Expect(err).ToNot(HaveOccurred()) go app.Listen("127.0.0.1:9090") @@ -724,14 +726,14 @@ var _ = Describe("API test", func() { var err error - bcl, ml, applicationConfig, err = startup.Startup( + application, err = startup.Startup( append(commonOpts, config.WithExternalBackend("huggingface", os.Getenv("HUGGINGFACE_GRPC")), config.WithContext(c), config.WithModelPath(modelPath), )...) Expect(err).ToNot(HaveOccurred()) - app, err = App(bcl, ml, applicationConfig) + app, err = App(application) Expect(err).ToNot(HaveOccurred()) go app.Listen("127.0.0.1:9090") @@ -761,6 +763,11 @@ var _ = Describe("API test", func() { Expect(len(models.Models)).To(Equal(6)) // If "config.yaml" should be included, this should be 8? }) It("can generate completions via ggml", func() { + bt, ok := os.LookupEnv("BUILD_TYPE") + if ok && strings.ToLower(bt) == "metal" { + Skip("GGML + Metal is known flaky, skip test temporarily") + } + resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel.ggml", Prompt: testPrompt}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) @@ -768,6 +775,11 @@ var _ = Describe("API test", func() { }) It("can generate chat completions via ggml", func() { + bt, ok := os.LookupEnv("BUILD_TYPE") + if ok && strings.ToLower(bt) == "metal" { + Skip("GGML + Metal is known flaky, skip test temporarily") + } + resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel.ggml", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) @@ -775,6 +787,11 @@ var _ = Describe("API test", func() { }) It("can generate completions from model configs", func() { + bt, ok := os.LookupEnv("BUILD_TYPE") + if ok && strings.ToLower(bt) == "metal" { + Skip("GGML + Metal is known flaky, skip test temporarily") + } + resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "gpt4all", Prompt: testPrompt}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) @@ -782,6 +799,11 @@ var _ = Describe("API test", func() { }) It("can generate chat completions from model configs", func() { + bt, ok := os.LookupEnv("BUILD_TYPE") + if ok && strings.ToLower(bt) == "metal" { + Skip("GGML + Metal is known flaky, skip test temporarily") + } + resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) @@ -868,9 +890,9 @@ var _ = Describe("API test", func() { Context("backends", func() { It("runs rwkv completion", func() { - if runtime.GOOS != "linux" { - Skip("test supported only on linux") - } + // if runtime.GOOS != "linux" { + // Skip("test supported only on linux") + // } resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "rwkv_test", Prompt: "Count up to five: one, two, three, four,"}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices) > 0).To(BeTrue()) @@ -891,17 +913,20 @@ var _ = Describe("API test", func() { } Expect(err).ToNot(HaveOccurred()) - text += response.Choices[0].Text - tokens++ + + if len(response.Choices) > 0 { + text += response.Choices[0].Text + tokens++ + } } Expect(text).ToNot(BeEmpty()) Expect(text).To(ContainSubstring("five")) Expect(tokens).ToNot(Or(Equal(1), Equal(0))) }) It("runs rwkv chat completion", func() { - if runtime.GOOS != "linux" { - Skip("test supported only on linux") - } + // if runtime.GOOS != "linux" { + // Skip("test supported only on linux") + // } resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}}) Expect(err).ToNot(HaveOccurred()) @@ -1010,14 +1035,14 @@ var _ = Describe("API test", func() { c, cancel = context.WithCancel(context.Background()) var err error - bcl, ml, applicationConfig, err = startup.Startup( + application, err = startup.Startup( append(commonOpts, config.WithContext(c), config.WithModelPath(modelPath), config.WithConfigFile(os.Getenv("CONFIG_FILE")))..., ) Expect(err).ToNot(HaveOccurred()) - app, err = App(bcl, ml, applicationConfig) + app, err = App(application) Expect(err).ToNot(HaveOccurred()) go app.Listen("127.0.0.1:9090") @@ -1041,18 +1066,33 @@ var _ = Describe("API test", func() { } }) It("can generate chat completions from config file (list1)", func() { + bt, ok := os.LookupEnv("BUILD_TYPE") + if ok && strings.ToLower(bt) == "metal" { + Skip("GGML + Metal is known flaky, skip test temporarily") + } + resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list1", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: testPrompt}}}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty()) }) It("can generate chat completions from config file (list2)", func() { + bt, ok := os.LookupEnv("BUILD_TYPE") + if ok && strings.ToLower(bt) == "metal" { + Skip("GGML + Metal is known flaky, skip test temporarily") + } + resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list2", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: testPrompt}}}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty()) }) It("can generate edit completions from config file", func() { + bt, ok := os.LookupEnv("BUILD_TYPE") + if ok && strings.ToLower(bt) == "metal" { + Skip("GGML + Metal is known flaky, skip test temporarily") + } + request := openaigo.EditCreateRequestBody{ Model: "list2", Instruction: "foo", diff --git a/core/http/ctx/fiber.go b/core/http/ctx/fiber.go index ffb63111..99fbcde9 100644 --- a/core/http/ctx/fiber.go +++ b/core/http/ctx/fiber.go @@ -1,43 +1,88 @@ package fiberContext import ( + "context" + "encoding/json" "fmt" "strings" + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/schema" "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" "github.com/rs/zerolog/log" ) +type FiberContextExtractor struct { + ml *model.ModelLoader + appConfig *config.ApplicationConfig +} + +func NewFiberContextExtractor(ml *model.ModelLoader, appConfig *config.ApplicationConfig) *FiberContextExtractor { + return &FiberContextExtractor{ + ml: ml, + appConfig: appConfig, + } +} + // ModelFromContext returns the model from the context // If no model is specified, it will take the first available // Takes a model string as input which should be the one received from the user request. // It returns the model name resolved from the context and an error if any. -func ModelFromContext(ctx *fiber.Ctx, loader *model.ModelLoader, modelInput string, firstModel bool) (string, error) { - if ctx.Params("model") != "" { - modelInput = ctx.Params("model") +func (fce *FiberContextExtractor) ModelFromContext(ctx *fiber.Ctx, modelInput string, firstModel bool) (string, error) { + ctxPM := ctx.Params("model") + if ctxPM != "" { + log.Debug().Msgf("[FCE] Overriding param modelInput %q with ctx.Params value %q", modelInput, ctxPM) + modelInput = ctxPM } // Set model from bearer token, if available - bearer := strings.TrimLeft(ctx.Get("authorization"), "Bearer ") - bearerExists := bearer != "" && loader.ExistsInModelPath(bearer) + bearer := strings.TrimPrefix(ctx.Get("authorization"), "Bearer ") + bearerExists := bearer != "" && fce.ml.ExistsInModelPath(bearer) // If no model was specified, take the first available if modelInput == "" && !bearerExists && firstModel { - models, _ := loader.ListModels() + models, _ := fce.ml.ListModels() if len(models) > 0 { modelInput = models[0] - log.Debug().Msgf("No model specified, using: %s", modelInput) + log.Debug().Msgf("[FCE] No model specified, using first available: %s", modelInput) } else { - log.Debug().Msgf("No model specified, returning error") - return "", fmt.Errorf("no model specified") + log.Warn().Msgf("[FCE] No model specified, none available") + return "", fmt.Errorf("[fce] no model specified, none available") } } // If a model is found in bearer token takes precedence if bearerExists { - log.Debug().Msgf("Using model from bearer token: %s", bearer) + log.Debug().Msgf("[FCE] Using model from bearer token: %s", bearer) modelInput = bearer } + + if modelInput == "" { + log.Warn().Msg("[FCE] modelInput is empty") + } return modelInput, nil } + +// TODO: Do we still need the first return value? +func (fce *FiberContextExtractor) OpenAIRequestFromContext(c *fiber.Ctx, firstModel bool) (string, *schema.OpenAIRequest, error) { + input := new(schema.OpenAIRequest) + + // Get input data from the request body + if err := c.BodyParser(input); err != nil { + return "", nil, fmt.Errorf("failed parsing request body: %w", err) + } + + received, _ := json.Marshal(input) + + ctx, cancel := context.WithCancel(fce.appConfig.Context) + input.Context = ctx + input.Cancel = cancel + + log.Debug().Msgf("Request received: %s", string(received)) + + var err error + input.Model, err = fce.ModelFromContext(c, input.Model, firstModel) + + return input.Model, input, err +} diff --git a/core/http/endpoints/elevenlabs/tts.go b/core/http/endpoints/elevenlabs/tts.go index 841f9b5f..4f5db463 100644 --- a/core/http/endpoints/elevenlabs/tts.go +++ b/core/http/endpoints/elevenlabs/tts.go @@ -2,9 +2,7 @@ package elevenlabs import ( "github.com/go-skynet/LocalAI/core/backend" - "github.com/go-skynet/LocalAI/core/config" fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" - "github.com/go-skynet/LocalAI/pkg/model" "github.com/go-skynet/LocalAI/core/schema" "github.com/gofiber/fiber/v2" @@ -17,7 +15,7 @@ import ( // @Param request body schema.TTSRequest true "query params" // @Success 200 {string} binary "Response" // @Router /v1/text-to-speech/{voice-id} [post] -func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { +func TTSEndpoint(fce *fiberContext.FiberContextExtractor, ttsbs *backend.TextToSpeechBackendService) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { input := new(schema.ElevenLabsTTSRequest) @@ -28,34 +26,21 @@ func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfi return err } - modelFile, err := fiberContext.ModelFromContext(c, ml, input.ModelID, false) + var err error + input.ModelID, err = fce.ModelFromContext(c, input.ModelID, false) if err != nil { - modelFile = input.ModelID log.Warn().Msgf("Model not found in context: %s", input.ModelID) } - cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath, - config.LoadOptionDebug(appConfig.Debug), - config.LoadOptionThreads(appConfig.Threads), - config.LoadOptionContextSize(appConfig.ContextSize), - config.LoadOptionF16(appConfig.F16), - ) - if err != nil { - modelFile = input.ModelID - log.Warn().Msgf("Model not found in context: %s", input.ModelID) - } else { - if input.ModelID != "" { - modelFile = input.ModelID - } else { - modelFile = cfg.Model - } + responseChannel := ttsbs.TextToAudioFile(&schema.TTSRequest{ + Model: input.ModelID, + Voice: voiceID, + Input: input.Text, + }) + rawValue := <-responseChannel + if rawValue.Error != nil { + return rawValue.Error } - log.Debug().Msgf("Request for model: %s", modelFile) - - filePath, _, err := backend.ModelTTS(cfg.Backend, input.Text, modelFile, voiceID, ml, appConfig, *cfg) - if err != nil { - return err - } - return c.Download(filePath) + return c.Download(*rawValue.Value) } } diff --git a/core/http/endpoints/localai/backend_monitor.go b/core/http/endpoints/localai/backend_monitor.go index 8c7a664a..dac20388 100644 --- a/core/http/endpoints/localai/backend_monitor.go +++ b/core/http/endpoints/localai/backend_monitor.go @@ -6,7 +6,7 @@ import ( "github.com/gofiber/fiber/v2" ) -func BackendMonitorEndpoint(bm services.BackendMonitor) func(c *fiber.Ctx) error { +func BackendMonitorEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { input := new(schema.BackendMonitorRequest) @@ -23,7 +23,7 @@ func BackendMonitorEndpoint(bm services.BackendMonitor) func(c *fiber.Ctx) error } } -func BackendShutdownEndpoint(bm services.BackendMonitor) func(c *fiber.Ctx) error { +func BackendShutdownEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { input := new(schema.BackendMonitorRequest) // Get input data from the request body diff --git a/core/http/endpoints/localai/tts.go b/core/http/endpoints/localai/tts.go index 7822e024..df7841fb 100644 --- a/core/http/endpoints/localai/tts.go +++ b/core/http/endpoints/localai/tts.go @@ -2,9 +2,7 @@ package localai import ( "github.com/go-skynet/LocalAI/core/backend" - "github.com/go-skynet/LocalAI/core/config" fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" - "github.com/go-skynet/LocalAI/pkg/model" "github.com/go-skynet/LocalAI/core/schema" "github.com/gofiber/fiber/v2" @@ -16,45 +14,26 @@ import ( // @Param request body schema.TTSRequest true "query params" // @Success 200 {string} binary "Response" // @Router /v1/audio/speech [post] -func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { +func TTSEndpoint(fce *fiberContext.FiberContextExtractor, ttsbs *backend.TextToSpeechBackendService) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - + var err error input := new(schema.TTSRequest) // Get input data from the request body - if err := c.BodyParser(input); err != nil { + if err = c.BodyParser(input); err != nil { return err } - modelFile, err := fiberContext.ModelFromContext(c, ml, input.Model, false) + input.Model, err = fce.ModelFromContext(c, input.Model, false) if err != nil { - modelFile = input.Model log.Warn().Msgf("Model not found in context: %s", input.Model) } - cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath, - config.LoadOptionDebug(appConfig.Debug), - config.LoadOptionThreads(appConfig.Threads), - config.LoadOptionContextSize(appConfig.ContextSize), - config.LoadOptionF16(appConfig.F16), - ) - - if err != nil { - modelFile = input.Model - log.Warn().Msgf("Model not found in context: %s", input.Model) - } else { - modelFile = cfg.Model + responseChannel := ttsbs.TextToAudioFile(input) + rawValue := <-responseChannel + if rawValue.Error != nil { + return rawValue.Error } - log.Debug().Msgf("Request for model: %s", modelFile) - - if input.Backend != "" { - cfg.Backend = input.Backend - } - - filePath, _, err := backend.ModelTTS(cfg.Backend, input.Input, modelFile, input.Voice, ml, appConfig, *cfg) - if err != nil { - return err - } - return c.Download(filePath) + return c.Download(*rawValue.Value) } } diff --git a/core/http/endpoints/openai/assistant.go b/core/http/endpoints/openai/assistant.go index dceb3789..72cb8b4a 100644 --- a/core/http/endpoints/openai/assistant.go +++ b/core/http/endpoints/openai/assistant.go @@ -339,7 +339,7 @@ func CreateAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model } } - return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find ")) + return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find assistantID %q", assistantID)) } } diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index 36d1142b..a240b024 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -5,17 +5,11 @@ import ( "bytes" "encoding/json" "fmt" - "strings" - "time" - "github.com/go-skynet/LocalAI/core/backend" - "github.com/go-skynet/LocalAI/core/config" + fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" "github.com/go-skynet/LocalAI/core/schema" - "github.com/go-skynet/LocalAI/pkg/grammar" - model "github.com/go-skynet/LocalAI/pkg/model" - "github.com/go-skynet/LocalAI/pkg/utils" + "github.com/go-skynet/LocalAI/core/services" "github.com/gofiber/fiber/v2" - "github.com/google/uuid" "github.com/rs/zerolog/log" "github.com/valyala/fasthttp" ) @@ -25,412 +19,82 @@ import ( // @Param request body schema.OpenAIRequest true "query params" // @Success 200 {object} schema.OpenAIResponse "Response" // @Router /v1/chat/completions [post] -func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error { - emptyMessage := "" - id := uuid.New().String() - created := int(time.Now().Unix()) - - process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) { - initialMessage := schema.OpenAIResponse{ - ID: id, - Created: created, - Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &emptyMessage}}}, - Object: "chat.completion.chunk", - } - responses <- initialMessage - - ComputeChoices(req, s, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool { - resp := schema.OpenAIResponse{ - ID: id, - Created: created, - Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: []schema.Choice{{Delta: &schema.Message{Content: &s}, Index: 0}}, - Object: "chat.completion.chunk", - Usage: schema.OpenAIUsage{ - PromptTokens: usage.Prompt, - CompletionTokens: usage.Completion, - TotalTokens: usage.Prompt + usage.Completion, - }, - } - - responses <- resp - return true - }) - close(responses) - } - processTools := func(noAction string, prompt string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) { - result := "" - _, tokenUsage, _ := ComputeChoices(req, prompt, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool { - result += s - // TODO: Change generated BNF grammar to be compliant with the schema so we can - // stream the result token by token here. - return true - }) - - results := parseFunctionCall(result, config.FunctionsConfig.ParallelCalls) - noActionToRun := len(results) > 0 && results[0].name == noAction - - switch { - case noActionToRun: - initialMessage := schema.OpenAIResponse{ - ID: id, - Created: created, - Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &emptyMessage}}}, - Object: "chat.completion.chunk", - } - responses <- initialMessage - - result, err := handleQuestion(config, req, ml, startupOptions, results[0].arguments, prompt) - if err != nil { - log.Error().Err(err).Msg("error handling question") - return - } - - resp := schema.OpenAIResponse{ - ID: id, - Created: created, - Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: []schema.Choice{{Delta: &schema.Message{Content: &result}, Index: 0}}, - Object: "chat.completion.chunk", - Usage: schema.OpenAIUsage{ - PromptTokens: tokenUsage.Prompt, - CompletionTokens: tokenUsage.Completion, - TotalTokens: tokenUsage.Prompt + tokenUsage.Completion, - }, - } - - responses <- resp - - default: - for i, ss := range results { - name, args := ss.name, ss.arguments - - initialMessage := schema.OpenAIResponse{ - ID: id, - Created: created, - Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: []schema.Choice{{ - Delta: &schema.Message{ - Role: "assistant", - ToolCalls: []schema.ToolCall{ - { - Index: i, - ID: id, - Type: "function", - FunctionCall: schema.FunctionCall{ - Name: name, - }, - }, - }, - }}}, - Object: "chat.completion.chunk", - } - responses <- initialMessage - - responses <- schema.OpenAIResponse{ - ID: id, - Created: created, - Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: []schema.Choice{{ - Delta: &schema.Message{ - Role: "assistant", - ToolCalls: []schema.ToolCall{ - { - Index: i, - ID: id, - Type: "function", - FunctionCall: schema.FunctionCall{ - Arguments: args, - }, - }, - }, - }}}, - Object: "chat.completion.chunk", - } - } - } - - close(responses) - } - +func ChatEndpoint(fce *fiberContext.FiberContextExtractor, oais *services.OpenAIService) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - processFunctions := false - funcs := grammar.Functions{} - modelFile, input, err := readRequest(c, ml, startupOptions, true) + _, request, err := fce.OpenAIRequestFromContext(c, false) if err != nil { - return fmt.Errorf("failed reading parameters from request:%w", err) + return fmt.Errorf("failed reading parameters from request: %w", err) } - config, input, err := mergeRequestWithConfig(modelFile, input, cl, ml, startupOptions.Debug, startupOptions.Threads, startupOptions.ContextSize, startupOptions.F16) + traceID, finalResultChannel, _, tokenChannel, err := oais.Chat(request, false, request.Stream) if err != nil { - return fmt.Errorf("failed reading parameters from request:%w", err) - } - log.Debug().Msgf("Configuration read: %+v", config) - - // Allow the user to set custom actions via config file - // to be "embedded" in each model - noActionName := "answer" - noActionDescription := "use this action to answer without performing any action" - - if config.FunctionsConfig.NoActionFunctionName != "" { - noActionName = config.FunctionsConfig.NoActionFunctionName - } - if config.FunctionsConfig.NoActionDescriptionName != "" { - noActionDescription = config.FunctionsConfig.NoActionDescriptionName + return err } - if input.ResponseFormat.Type == "json_object" { - input.Grammar = grammar.JSONBNF - } + if request.Stream { - config.Grammar = input.Grammar + log.Debug().Msgf("Chat Stream request received") - // process functions if we have any defined or if we have a function call string - if len(input.Functions) > 0 && config.ShouldUseFunctions() { - log.Debug().Msgf("Response needs to process functions") - - processFunctions = true - - noActionGrammar := grammar.Function{ - Name: noActionName, - Description: noActionDescription, - Parameters: map[string]interface{}{ - "properties": map[string]interface{}{ - "message": map[string]interface{}{ - "type": "string", - "description": "The message to reply the user with", - }}, - }, - } - - // Append the no action function - funcs = append(funcs, input.Functions...) - if !config.FunctionsConfig.DisableNoAction { - funcs = append(funcs, noActionGrammar) - } - - // Force picking one of the functions by the request - if config.FunctionToCall() != "" { - funcs = funcs.Select(config.FunctionToCall()) - } - - // Update input grammar - jsStruct := funcs.ToJSONStructure() - config.Grammar = jsStruct.Grammar("", config.FunctionsConfig.ParallelCalls) - } else if input.JSONFunctionGrammarObject != nil { - config.Grammar = input.JSONFunctionGrammarObject.Grammar("", config.FunctionsConfig.ParallelCalls) - } - - // functions are not supported in stream mode (yet?) - toStream := input.Stream - - log.Debug().Msgf("Parameters: %+v", config) - - var predInput string - - // If we are using the tokenizer template, we don't need to process the messages - // unless we are processing functions - if !config.TemplateConfig.UseTokenizerTemplate || processFunctions { - - suppressConfigSystemPrompt := false - mess := []string{} - for messageIndex, i := range input.Messages { - var content string - role := i.Role - - // if function call, we might want to customize the role so we can display better that the "assistant called a json action" - // if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request - if (i.FunctionCall != nil || i.ToolCalls != nil) && i.Role == "assistant" { - roleFn := "assistant_function_call" - r := config.Roles[roleFn] - if r != "" { - role = roleFn - } - } - r := config.Roles[role] - contentExists := i.Content != nil && i.StringContent != "" - - fcall := i.FunctionCall - if len(i.ToolCalls) > 0 { - fcall = i.ToolCalls - } - - // First attempt to populate content via a chat message specific template - if config.TemplateConfig.ChatMessage != "" { - chatMessageData := model.ChatMessageTemplateData{ - SystemPrompt: config.SystemPrompt, - Role: r, - RoleName: role, - Content: i.StringContent, - FunctionCall: fcall, - FunctionName: i.Name, - LastMessage: messageIndex == (len(input.Messages) - 1), - Function: config.Grammar != "" && (messageIndex == (len(input.Messages) - 1)), - MessageIndex: messageIndex, - } - templatedChatMessage, err := ml.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData) - if err != nil { - log.Error().Err(err).Interface("message", chatMessageData).Str("template", config.TemplateConfig.ChatMessage).Msg("error processing message with template, skipping") - } else { - if templatedChatMessage == "" { - log.Warn().Msgf("template \"%s\" produced blank output for %+v. Skipping!", config.TemplateConfig.ChatMessage, chatMessageData) - continue // TODO: This continue is here intentionally to skip over the line `mess = append(mess, content)` below, and to prevent the sprintf - } - log.Debug().Msgf("templated message for chat: %s", templatedChatMessage) - content = templatedChatMessage - } - } - - marshalAnyRole := func(f any) { - j, err := json.Marshal(f) - if err == nil { - if contentExists { - content += "\n" + fmt.Sprint(r, " ", string(j)) - } else { - content = fmt.Sprint(r, " ", string(j)) - } - } - } - marshalAny := func(f any) { - j, err := json.Marshal(f) - if err == nil { - if contentExists { - content += "\n" + string(j) - } else { - content = string(j) - } - } - } - // If this model doesn't have such a template, or if that template fails to return a value, template at the message level. - if content == "" { - if r != "" { - if contentExists { - content = fmt.Sprint(r, i.StringContent) - } - - if i.FunctionCall != nil { - marshalAnyRole(i.FunctionCall) - } - if i.ToolCalls != nil { - marshalAnyRole(i.ToolCalls) - } - } else { - if contentExists { - content = fmt.Sprint(i.StringContent) - } - if i.FunctionCall != nil { - marshalAny(i.FunctionCall) - } - if i.ToolCalls != nil { - marshalAny(i.ToolCalls) - } - } - // Special Handling: System. We care if it was printed at all, not the r branch, so check seperately - if contentExists && role == "system" { - suppressConfigSystemPrompt = true - } - } - - mess = append(mess, content) - } - - predInput = strings.Join(mess, "\n") - log.Debug().Msgf("Prompt (before templating): %s", predInput) - - templateFile := "" - - // A model can have a "file.bin.tmpl" file associated with a prompt template prefix - if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) { - templateFile = config.Model - } - - if config.TemplateConfig.Chat != "" && !processFunctions { - templateFile = config.TemplateConfig.Chat - } - - if config.TemplateConfig.Functions != "" && processFunctions { - templateFile = config.TemplateConfig.Functions - } - - if templateFile != "" { - templatedInput, err := ml.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{ - SystemPrompt: config.SystemPrompt, - SuppressSystemPrompt: suppressConfigSystemPrompt, - Input: predInput, - Functions: funcs, - }) - if err == nil { - predInput = templatedInput - log.Debug().Msgf("Template found, input modified to: %s", predInput) - } else { - log.Debug().Msgf("Template failed loading: %s", err.Error()) - } - } - - log.Debug().Msgf("Prompt (after templating): %s", predInput) - if processFunctions { - log.Debug().Msgf("Grammar: %+v", config.Grammar) - } - } - - switch { - case toStream: - - log.Debug().Msgf("Stream request received") c.Context().SetContentType("text/event-stream") //c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8) - // c.Set("Content-Type", "text/event-stream") + // c.Set("Cache-Control", "no-cache") c.Set("Connection", "keep-alive") c.Set("Transfer-Encoding", "chunked") - responses := make(chan schema.OpenAIResponse) - - if !processFunctions { - go process(predInput, input, config, ml, responses) - } else { - go processTools(noActionName, predInput, input, config, ml, responses) - } - c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) { usage := &schema.OpenAIUsage{} toolsCalled := false - for ev := range responses { - usage = &ev.Usage // Copy a pointer to the latest usage chunk so that the stop message can reference it - if len(ev.Choices[0].Delta.ToolCalls) > 0 { + for ev := range tokenChannel { + if ev.Error != nil { + log.Debug().Err(ev.Error).Msg("chat streaming responseChannel error") + request.Cancel() + break + } + usage = &ev.Value.Usage // Copy a pointer to the latest usage chunk so that the stop message can reference it + + if len(ev.Value.Choices[0].Delta.ToolCalls) > 0 { toolsCalled = true } var buf bytes.Buffer enc := json.NewEncoder(&buf) - enc.Encode(ev) - log.Debug().Msgf("Sending chunk: %s", buf.String()) + if ev.Error != nil { + log.Debug().Err(ev.Error).Msg("[ChatEndpoint] error to debug during tokenChannel handler") + enc.Encode(ev.Error) + } else { + enc.Encode(ev.Value) + } + log.Debug().Msgf("chat streaming sending chunk: %s", buf.String()) _, err := fmt.Fprintf(w, "data: %v\n", buf.String()) if err != nil { - log.Debug().Msgf("Sending chunk failed: %v", err) - input.Cancel() + log.Debug().Err(err).Msgf("Sending chunk failed") + request.Cancel() + break + } + err = w.Flush() + if err != nil { + log.Debug().Msg("error while flushing, closing connection") + request.Cancel() break } - w.Flush() } finishReason := "stop" if toolsCalled { finishReason = "tool_calls" - } else if toolsCalled && len(input.Tools) == 0 { + } else if toolsCalled && len(request.Tools) == 0 { finishReason = "function_call" } resp := &schema.OpenAIResponse{ - ID: id, - Created: created, - Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. + ID: traceID.ID, + Created: traceID.Created, + Model: request.Model, // we have to return what the user sent here, due to OpenAI spec. Choices: []schema.Choice{ { FinishReason: finishReason, Index: 0, - Delta: &schema.Message{Content: &emptyMessage}, + Delta: &schema.Message{Content: ""}, }}, Object: "chat.completion.chunk", Usage: *usage, @@ -441,202 +105,21 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup w.WriteString("data: [DONE]\n\n") w.Flush() })) + return nil - - // no streaming mode - default: - result, tokenUsage, err := ComputeChoices(input, predInput, config, startupOptions, ml, func(s string, c *[]schema.Choice) { - if !processFunctions { - // no function is called, just reply and use stop as finish reason - *c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}}) - return - } - - results := parseFunctionCall(s, config.FunctionsConfig.ParallelCalls) - noActionsToRun := len(results) > 0 && results[0].name == noActionName - - switch { - case noActionsToRun: - result, err := handleQuestion(config, input, ml, startupOptions, results[0].arguments, predInput) - if err != nil { - log.Error().Err(err).Msg("error handling question") - return - } - *c = append(*c, schema.Choice{ - Message: &schema.Message{Role: "assistant", Content: &result}}) - default: - toolChoice := schema.Choice{ - Message: &schema.Message{ - Role: "assistant", - }, - } - - if len(input.Tools) > 0 { - toolChoice.FinishReason = "tool_calls" - } - - for _, ss := range results { - name, args := ss.name, ss.arguments - if len(input.Tools) > 0 { - // If we are using tools, we condense the function calls into - // a single response choice with all the tools - toolChoice.Message.ToolCalls = append(toolChoice.Message.ToolCalls, - schema.ToolCall{ - ID: id, - Type: "function", - FunctionCall: schema.FunctionCall{ - Name: name, - Arguments: args, - }, - }, - ) - } else { - // otherwise we return more choices directly - *c = append(*c, schema.Choice{ - FinishReason: "function_call", - Message: &schema.Message{ - Role: "assistant", - FunctionCall: map[string]interface{}{ - "name": name, - "arguments": args, - }, - }, - }) - } - } - - if len(input.Tools) > 0 { - // we need to append our result if we are using tools - *c = append(*c, toolChoice) - } - } - - }, nil) - if err != nil { - return err - } - - resp := &schema.OpenAIResponse{ - ID: id, - Created: created, - Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: result, - Object: "chat.completion", - Usage: schema.OpenAIUsage{ - PromptTokens: tokenUsage.Prompt, - CompletionTokens: tokenUsage.Completion, - TotalTokens: tokenUsage.Prompt + tokenUsage.Completion, - }, - } - respData, _ := json.Marshal(resp) - log.Debug().Msgf("Response: %s", respData) - - // Return the prediction in the response body - return c.JSON(resp) } + // TODO is this proper to have exclusive from Stream, or do we need to issue both responses? + rawResponse := <-finalResultChannel + + if rawResponse.Error != nil { + return rawResponse.Error + } + + jsonResult, _ := json.Marshal(rawResponse.Value) + log.Debug().Str("jsonResult", string(jsonResult)).Msg("Chat Final Response") + + // Return the prediction in the response body + return c.JSON(rawResponse.Value) } } - -func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, ml *model.ModelLoader, o *config.ApplicationConfig, args, prompt string) (string, error) { - log.Debug().Msgf("nothing to do, computing a reply") - - // If there is a message that the LLM already sends as part of the JSON reply, use it - arguments := map[string]interface{}{} - json.Unmarshal([]byte(args), &arguments) - m, exists := arguments["message"] - if exists { - switch message := m.(type) { - case string: - if message != "" { - log.Debug().Msgf("Reply received from LLM: %s", message) - message = backend.Finetune(*config, prompt, message) - log.Debug().Msgf("Reply received from LLM(finetuned): %s", message) - - return message, nil - } - } - } - - log.Debug().Msgf("No action received from LLM, without a message, computing a reply") - // Otherwise ask the LLM to understand the JSON output and the context, and return a message - // Note: This costs (in term of CPU/GPU) another computation - config.Grammar = "" - images := []string{} - for _, m := range input.Messages { - images = append(images, m.StringImages...) - } - - predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, ml, *config, o, nil) - if err != nil { - log.Error().Err(err).Msg("model inference failed") - return "", err - } - - prediction, err := predFunc() - if err != nil { - log.Error().Err(err).Msg("prediction failed") - return "", err - } - return backend.Finetune(*config, prompt, prediction.Response), nil -} - -type funcCallResults struct { - name string - arguments string -} - -func parseFunctionCall(llmresult string, multipleResults bool) []funcCallResults { - results := []funcCallResults{} - - // TODO: use generics to avoid this code duplication - if multipleResults { - ss := []map[string]interface{}{} - s := utils.EscapeNewLines(llmresult) - json.Unmarshal([]byte(s), &ss) - log.Debug().Msgf("Function return: %s %+v", s, ss) - - for _, s := range ss { - func_name, ok := s["function"] - if !ok { - continue - } - args, ok := s["arguments"] - if !ok { - continue - } - d, _ := json.Marshal(args) - funcName, ok := func_name.(string) - if !ok { - continue - } - results = append(results, funcCallResults{name: funcName, arguments: string(d)}) - } - } else { - // As we have to change the result before processing, we can't stream the answer token-by-token (yet?) - ss := map[string]interface{}{} - // This prevent newlines to break JSON parsing for clients - s := utils.EscapeNewLines(llmresult) - json.Unmarshal([]byte(s), &ss) - log.Debug().Msgf("Function return: %s %+v", s, ss) - - // The grammar defines the function name as "function", while OpenAI returns "name" - func_name, ok := ss["function"] - if !ok { - return results - } - // Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object - args, ok := ss["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix) - if !ok { - return results - } - d, _ := json.Marshal(args) - funcName, ok := func_name.(string) - if !ok { - return results - } - results = append(results, funcCallResults{name: funcName, arguments: string(d)}) - } - - return results -} diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go index 69923475..d8b412a9 100644 --- a/core/http/endpoints/openai/completion.go +++ b/core/http/endpoints/openai/completion.go @@ -4,18 +4,13 @@ import ( "bufio" "bytes" "encoding/json" - "errors" "fmt" - "time" - "github.com/go-skynet/LocalAI/core/backend" - "github.com/go-skynet/LocalAI/core/config" + fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" + "github.com/go-skynet/LocalAI/core/services" "github.com/go-skynet/LocalAI/core/schema" - "github.com/go-skynet/LocalAI/pkg/grammar" - model "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" - "github.com/google/uuid" "github.com/rs/zerolog/log" "github.com/valyala/fasthttp" ) @@ -25,116 +20,50 @@ import ( // @Param request body schema.OpenAIRequest true "query params" // @Success 200 {object} schema.OpenAIResponse "Response" // @Router /v1/completions [post] -func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { - id := uuid.New().String() - created := int(time.Now().Unix()) - - process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) { - ComputeChoices(req, s, config, appConfig, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool { - resp := schema.OpenAIResponse{ - ID: id, - Created: created, - Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: []schema.Choice{ - { - Index: 0, - Text: s, - }, - }, - Object: "text_completion", - Usage: schema.OpenAIUsage{ - PromptTokens: usage.Prompt, - CompletionTokens: usage.Completion, - TotalTokens: usage.Prompt + usage.Completion, - }, - } - log.Debug().Msgf("Sending goroutine: %s", s) - - responses <- resp - return true - }) - close(responses) - } - +func CompletionEndpoint(fce *fiberContext.FiberContextExtractor, oais *services.OpenAIService) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - modelFile, input, err := readRequest(c, ml, appConfig, true) + _, request, err := fce.OpenAIRequestFromContext(c, false) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } - log.Debug().Msgf("`input`: %+v", input) + log.Debug().Msgf("`OpenAIRequest`: %+v", request) - config, input, err := mergeRequestWithConfig(modelFile, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16) + traceID, finalResultChannel, _, _, tokenChannel, err := oais.Completion(request, false, request.Stream) if err != nil { - return fmt.Errorf("failed reading parameters from request:%w", err) + return err } - if input.ResponseFormat.Type == "json_object" { - input.Grammar = grammar.JSONBNF - } + if request.Stream { + log.Debug().Msgf("Completion Stream request received") - config.Grammar = input.Grammar - - log.Debug().Msgf("Parameter Config: %+v", config) - - if input.Stream { - log.Debug().Msgf("Stream request received") c.Context().SetContentType("text/event-stream") //c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8) //c.Set("Content-Type", "text/event-stream") c.Set("Cache-Control", "no-cache") c.Set("Connection", "keep-alive") c.Set("Transfer-Encoding", "chunked") - } - - templateFile := "" - - // A model can have a "file.bin.tmpl" file associated with a prompt template prefix - if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) { - templateFile = config.Model - } - - if config.TemplateConfig.Completion != "" { - templateFile = config.TemplateConfig.Completion - } - - if input.Stream { - if len(config.PromptStrings) > 1 { - return errors.New("cannot handle more than 1 `PromptStrings` when Streaming") - } - - predInput := config.PromptStrings[0] - - if templateFile != "" { - templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{ - Input: predInput, - }) - if err == nil { - predInput = templatedInput - log.Debug().Msgf("Template found, input modified to: %s", predInput) - } - } - - responses := make(chan schema.OpenAIResponse) - - go process(predInput, input, config, ml, responses) c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) { - - for ev := range responses { + for ev := range tokenChannel { var buf bytes.Buffer enc := json.NewEncoder(&buf) - enc.Encode(ev) + if ev.Error != nil { + log.Debug().Msgf("[CompletionEndpoint] error to debug during tokenChannel handler: %q", ev.Error) + enc.Encode(ev.Error) + } else { + enc.Encode(ev.Value) + } - log.Debug().Msgf("Sending chunk: %s", buf.String()) + log.Debug().Msgf("completion streaming sending chunk: %s", buf.String()) fmt.Fprintf(w, "data: %v\n", buf.String()) w.Flush() } resp := &schema.OpenAIResponse{ - ID: id, - Created: created, - Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. + ID: traceID.ID, + Created: traceID.Created, + Model: request.Model, // we have to return what the user sent here, due to OpenAI spec. Choices: []schema.Choice{ { Index: 0, @@ -151,55 +80,15 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a })) return nil } - - var result []schema.Choice - - totalTokenUsage := backend.TokenUsage{} - - for k, i := range config.PromptStrings { - if templateFile != "" { - // A model can have a "file.bin.tmpl" file associated with a prompt template prefix - templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{ - SystemPrompt: config.SystemPrompt, - Input: i, - }) - if err == nil { - i = templatedInput - log.Debug().Msgf("Template found, input modified to: %s", i) - } - } - - r, tokenUsage, err := ComputeChoices( - input, i, config, appConfig, ml, func(s string, c *[]schema.Choice) { - *c = append(*c, schema.Choice{Text: s, FinishReason: "stop", Index: k}) - }, nil) - if err != nil { - return err - } - - totalTokenUsage.Prompt += tokenUsage.Prompt - totalTokenUsage.Completion += tokenUsage.Completion - - result = append(result, r...) + // TODO is this proper to have exclusive from Stream, or do we need to issue both responses? + rawResponse := <-finalResultChannel + if rawResponse.Error != nil { + return rawResponse.Error } - - resp := &schema.OpenAIResponse{ - ID: id, - Created: created, - Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: result, - Object: "text_completion", - Usage: schema.OpenAIUsage{ - PromptTokens: totalTokenUsage.Prompt, - CompletionTokens: totalTokenUsage.Completion, - TotalTokens: totalTokenUsage.Prompt + totalTokenUsage.Completion, - }, - } - - jsonResult, _ := json.Marshal(resp) + jsonResult, _ := json.Marshal(rawResponse.Value) log.Debug().Msgf("Response: %s", jsonResult) // Return the prediction in the response body - return c.JSON(resp) + return c.JSON(rawResponse.Value) } } diff --git a/core/http/endpoints/openai/edit.go b/core/http/endpoints/openai/edit.go index 25497095..a33050dd 100644 --- a/core/http/endpoints/openai/edit.go +++ b/core/http/endpoints/openai/edit.go @@ -3,92 +3,36 @@ package openai import ( "encoding/json" "fmt" - "time" - "github.com/go-skynet/LocalAI/core/backend" - "github.com/go-skynet/LocalAI/core/config" + fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" + "github.com/go-skynet/LocalAI/core/services" - "github.com/go-skynet/LocalAI/core/schema" - model "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" - "github.com/google/uuid" "github.com/rs/zerolog/log" ) -func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { +func EditEndpoint(fce *fiberContext.FiberContextExtractor, oais *services.OpenAIService) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - modelFile, input, err := readRequest(c, ml, appConfig, true) + _, request, err := fce.OpenAIRequestFromContext(c, false) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } - config, input, err := mergeRequestWithConfig(modelFile, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16) + _, finalResultChannel, _, _, _, err := oais.Edit(request, false, request.Stream) if err != nil { - return fmt.Errorf("failed reading parameters from request:%w", err) + return err } - log.Debug().Msgf("Parameter Config: %+v", config) - - templateFile := "" - - // A model can have a "file.bin.tmpl" file associated with a prompt template prefix - if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) { - templateFile = config.Model + rawResponse := <-finalResultChannel + if rawResponse.Error != nil { + return rawResponse.Error } - if config.TemplateConfig.Edit != "" { - templateFile = config.TemplateConfig.Edit - } - - var result []schema.Choice - totalTokenUsage := backend.TokenUsage{} - - for _, i := range config.InputStrings { - if templateFile != "" { - templatedInput, err := ml.EvaluateTemplateForPrompt(model.EditPromptTemplate, templateFile, model.PromptTemplateData{ - Input: i, - Instruction: input.Instruction, - SystemPrompt: config.SystemPrompt, - }) - if err == nil { - i = templatedInput - log.Debug().Msgf("Template found, input modified to: %s", i) - } - } - - r, tokenUsage, err := ComputeChoices(input, i, config, appConfig, ml, func(s string, c *[]schema.Choice) { - *c = append(*c, schema.Choice{Text: s}) - }, nil) - if err != nil { - return err - } - - totalTokenUsage.Prompt += tokenUsage.Prompt - totalTokenUsage.Completion += tokenUsage.Completion - - result = append(result, r...) - } - - id := uuid.New().String() - created := int(time.Now().Unix()) - resp := &schema.OpenAIResponse{ - ID: id, - Created: created, - Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: result, - Object: "edit", - Usage: schema.OpenAIUsage{ - PromptTokens: totalTokenUsage.Prompt, - CompletionTokens: totalTokenUsage.Completion, - TotalTokens: totalTokenUsage.Prompt + totalTokenUsage.Completion, - }, - } - - jsonResult, _ := json.Marshal(resp) + jsonResult, _ := json.Marshal(rawResponse.Value) log.Debug().Msgf("Response: %s", jsonResult) // Return the prediction in the response body - return c.JSON(resp) + return c.JSON(rawResponse.Value) } } diff --git a/core/http/endpoints/openai/embeddings.go b/core/http/endpoints/openai/embeddings.go index eca34f79..be546991 100644 --- a/core/http/endpoints/openai/embeddings.go +++ b/core/http/endpoints/openai/embeddings.go @@ -3,14 +3,9 @@ package openai import ( "encoding/json" "fmt" - "time" "github.com/go-skynet/LocalAI/core/backend" - "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/pkg/model" - - "github.com/go-skynet/LocalAI/core/schema" - "github.com/google/uuid" + fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" "github.com/gofiber/fiber/v2" "github.com/rs/zerolog/log" @@ -21,63 +16,25 @@ import ( // @Param request body schema.OpenAIRequest true "query params" // @Success 200 {object} schema.OpenAIResponse "Response" // @Router /v1/embeddings [post] -func EmbeddingsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { +func EmbeddingsEndpoint(fce *fiberContext.FiberContextExtractor, ebs *backend.EmbeddingsBackendService) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - model, input, err := readRequest(c, ml, appConfig, true) + _, input, err := fce.OpenAIRequestFromContext(c, true) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } - config, input, err := mergeRequestWithConfig(model, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16) - if err != nil { - return fmt.Errorf("failed reading parameters from request:%w", err) + responseChannel := ebs.Embeddings(input) + + rawResponse := <-responseChannel + + if rawResponse.Error != nil { + return rawResponse.Error } - log.Debug().Msgf("Parameter Config: %+v", config) - items := []schema.Item{} - - for i, s := range config.InputToken { - // get the model function to call for the result - embedFn, err := backend.ModelEmbedding("", s, ml, *config, appConfig) - if err != nil { - return err - } - - embeddings, err := embedFn() - if err != nil { - return err - } - items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"}) - } - - for i, s := range config.InputStrings { - // get the model function to call for the result - embedFn, err := backend.ModelEmbedding(s, []int{}, ml, *config, appConfig) - if err != nil { - return err - } - - embeddings, err := embedFn() - if err != nil { - return err - } - items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"}) - } - - id := uuid.New().String() - created := int(time.Now().Unix()) - resp := &schema.OpenAIResponse{ - ID: id, - Created: created, - Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. - Data: items, - Object: "list", - } - - jsonResult, _ := json.Marshal(resp) + jsonResult, _ := json.Marshal(rawResponse.Value) log.Debug().Msgf("Response: %s", jsonResult) // Return the prediction in the response body - return c.JSON(resp) + return c.JSON(rawResponse.Value) } } diff --git a/core/http/endpoints/openai/image.go b/core/http/endpoints/openai/image.go index 9e806b3e..ec3d84da 100644 --- a/core/http/endpoints/openai/image.go +++ b/core/http/endpoints/openai/image.go @@ -1,50 +1,18 @@ package openai import ( - "bufio" - "encoding/base64" "encoding/json" "fmt" - "io" - "net/http" - "os" - "path/filepath" - "strconv" - "strings" - "time" - "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/schema" - "github.com/google/uuid" + fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" "github.com/go-skynet/LocalAI/core/backend" - model "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" "github.com/rs/zerolog/log" ) -func downloadFile(url string) (string, error) { - // Get the data - resp, err := http.Get(url) - if err != nil { - return "", err - } - defer resp.Body.Close() - - // Create the file - out, err := os.CreateTemp("", "image") - if err != nil { - return "", err - } - defer out.Close() - - // Write the body to file - _, err = io.Copy(out, resp.Body) - return out.Name(), err -} - -// +// https://platform.openai.com/docs/api-reference/images/create /* * @@ -59,186 +27,36 @@ func downloadFile(url string) (string, error) { * */ + // ImageEndpoint is the OpenAI Image generation API endpoint https://platform.openai.com/docs/api-reference/images/create // @Summary Creates an image given a prompt. // @Param request body schema.OpenAIRequest true "query params" // @Success 200 {object} schema.OpenAIResponse "Response" // @Router /v1/images/generations [post] -func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { +func ImageEndpoint(fce *fiberContext.FiberContextExtractor, igbs *backend.ImageGenerationBackendService) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - m, input, err := readRequest(c, ml, appConfig, false) + // TODO: Somewhat a hack. Is there a better place to assign this? + if igbs.BaseUrlForGeneratedImages == "" { + igbs.BaseUrlForGeneratedImages = c.BaseURL() + "/generated-images/" + } + _, request, err := fce.OpenAIRequestFromContext(c, false) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } - if m == "" { - m = model.StableDiffusionBackend - } - log.Debug().Msgf("Loading model: %+v", m) + responseChannel := igbs.GenerateImage(request) + rawResponse := <-responseChannel - config, input, err := mergeRequestWithConfig(m, input, cl, ml, appConfig.Debug, 0, 0, false) + if rawResponse.Error != nil { + return rawResponse.Error + } + + jsonResult, err := json.Marshal(rawResponse.Value) if err != nil { - return fmt.Errorf("failed reading parameters from request:%w", err) + return err } - - src := "" - if input.File != "" { - - fileData := []byte{} - // check if input.File is an URL, if so download it and save it - // to a temporary file - if strings.HasPrefix(input.File, "http://") || strings.HasPrefix(input.File, "https://") { - out, err := downloadFile(input.File) - if err != nil { - return fmt.Errorf("failed downloading file:%w", err) - } - defer os.RemoveAll(out) - - fileData, err = os.ReadFile(out) - if err != nil { - return fmt.Errorf("failed reading file:%w", err) - } - - } else { - // base 64 decode the file and write it somewhere - // that we will cleanup - fileData, err = base64.StdEncoding.DecodeString(input.File) - if err != nil { - return err - } - } - - // Create a temporary file - outputFile, err := os.CreateTemp(appConfig.ImageDir, "b64") - if err != nil { - return err - } - // write the base64 result - writer := bufio.NewWriter(outputFile) - _, err = writer.Write(fileData) - if err != nil { - outputFile.Close() - return err - } - outputFile.Close() - src = outputFile.Name() - defer os.RemoveAll(src) - } - - log.Debug().Msgf("Parameter Config: %+v", config) - - switch config.Backend { - case "stablediffusion": - config.Backend = model.StableDiffusionBackend - case "tinydream": - config.Backend = model.TinyDreamBackend - case "": - config.Backend = model.StableDiffusionBackend - } - - sizeParts := strings.Split(input.Size, "x") - if len(sizeParts) != 2 { - return fmt.Errorf("invalid value for 'size'") - } - width, err := strconv.Atoi(sizeParts[0]) - if err != nil { - return fmt.Errorf("invalid value for 'size'") - } - height, err := strconv.Atoi(sizeParts[1]) - if err != nil { - return fmt.Errorf("invalid value for 'size'") - } - - b64JSON := false - if input.ResponseFormat.Type == "b64_json" { - b64JSON = true - } - // src and clip_skip - var result []schema.Item - for _, i := range config.PromptStrings { - n := input.N - if input.N == 0 { - n = 1 - } - for j := 0; j < n; j++ { - prompts := strings.Split(i, "|") - positive_prompt := prompts[0] - negative_prompt := "" - if len(prompts) > 1 { - negative_prompt = prompts[1] - } - - mode := 0 - step := config.Step - if step == 0 { - step = 15 - } - - if input.Mode != 0 { - mode = input.Mode - } - - if input.Step != 0 { - step = input.Step - } - - tempDir := "" - if !b64JSON { - tempDir = appConfig.ImageDir - } - // Create a temporary file - outputFile, err := os.CreateTemp(tempDir, "b64") - if err != nil { - return err - } - outputFile.Close() - output := outputFile.Name() + ".png" - // Rename the temporary file - err = os.Rename(outputFile.Name(), output) - if err != nil { - return err - } - - baseURL := c.BaseURL() - - fn, err := backend.ImageGeneration(height, width, mode, step, *config.Seed, positive_prompt, negative_prompt, src, output, ml, *config, appConfig) - if err != nil { - return err - } - if err := fn(); err != nil { - return err - } - - item := &schema.Item{} - - if b64JSON { - defer os.RemoveAll(output) - data, err := os.ReadFile(output) - if err != nil { - return err - } - item.B64JSON = base64.StdEncoding.EncodeToString(data) - } else { - base := filepath.Base(output) - item.URL = baseURL + "/generated-images/" + base - } - - result = append(result, *item) - } - } - - id := uuid.New().String() - created := int(time.Now().Unix()) - resp := &schema.OpenAIResponse{ - ID: id, - Created: created, - Data: result, - } - - jsonResult, _ := json.Marshal(resp) log.Debug().Msgf("Response: %s", jsonResult) - // Return the prediction in the response body - return c.JSON(resp) + return c.JSON(rawResponse.Value) } } diff --git a/core/http/endpoints/openai/inference.go b/core/http/endpoints/openai/inference.go deleted file mode 100644 index 06e784b7..00000000 --- a/core/http/endpoints/openai/inference.go +++ /dev/null @@ -1,55 +0,0 @@ -package openai - -import ( - "github.com/go-skynet/LocalAI/core/backend" - "github.com/go-skynet/LocalAI/core/config" - - "github.com/go-skynet/LocalAI/core/schema" - model "github.com/go-skynet/LocalAI/pkg/model" -) - -func ComputeChoices( - req *schema.OpenAIRequest, - predInput string, - config *config.BackendConfig, - o *config.ApplicationConfig, - loader *model.ModelLoader, - cb func(string, *[]schema.Choice), - tokenCallback func(string, backend.TokenUsage) bool) ([]schema.Choice, backend.TokenUsage, error) { - n := req.N // number of completions to return - result := []schema.Choice{} - - if n == 0 { - n = 1 - } - - images := []string{} - for _, m := range req.Messages { - images = append(images, m.StringImages...) - } - - // get the model function to call for the result - predFunc, err := backend.ModelInference(req.Context, predInput, req.Messages, images, loader, *config, o, tokenCallback) - if err != nil { - return result, backend.TokenUsage{}, err - } - - tokenUsage := backend.TokenUsage{} - - for i := 0; i < n; i++ { - prediction, err := predFunc() - if err != nil { - return result, backend.TokenUsage{}, err - } - - tokenUsage.Prompt += prediction.Usage.Prompt - tokenUsage.Completion += prediction.Usage.Completion - - finetunedResponse := backend.Finetune(*config, predInput, prediction.Response) - cb(finetunedResponse, &result) - - //result = append(result, Choice{Text: prediction}) - - } - return result, tokenUsage, err -} diff --git a/core/http/endpoints/openai/list.go b/core/http/endpoints/openai/list.go index 04e611a2..9bb2b2ca 100644 --- a/core/http/endpoints/openai/list.go +++ b/core/http/endpoints/openai/list.go @@ -1,61 +1,21 @@ package openai import ( - "regexp" - - "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/schema" - model "github.com/go-skynet/LocalAI/pkg/model" + "github.com/go-skynet/LocalAI/core/services" "github.com/gofiber/fiber/v2" ) -func ListModelsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader) func(ctx *fiber.Ctx) error { +func ListModelsEndpoint(lms *services.ListModelsService) func(ctx *fiber.Ctx) error { return func(c *fiber.Ctx) error { - models, err := ml.ListModels() - if err != nil { - return err - } - var mm map[string]interface{} = map[string]interface{}{} - - dataModels := []schema.OpenAIModel{} - - var filterFn func(name string) bool + // If blank, no filter is applied. filter := c.Query("filter") - - // If filter is not specified, do not filter the list by model name - if filter == "" { - filterFn = func(_ string) bool { return true } - } else { - // If filter _IS_ specified, we compile it to a regex which is used to create the filterFn - rxp, err := regexp.Compile(filter) - if err != nil { - return err - } - filterFn = func(name string) bool { - return rxp.MatchString(name) - } - } - // By default, exclude any loose files that are already referenced by a configuration file. excludeConfigured := c.QueryBool("excludeConfigured", true) - // Start with the known configurations - for _, c := range cl.GetAllBackendConfigs() { - if excludeConfigured { - mm[c.Model] = nil - } - - if filterFn(c.Name) { - dataModels = append(dataModels, schema.OpenAIModel{ID: c.Name, Object: "model"}) - } - } - - // Then iterate through the loose files: - for _, m := range models { - // And only adds them if they shouldn't be skipped. - if _, exists := mm[m]; !exists && filterFn(m) { - dataModels = append(dataModels, schema.OpenAIModel{ID: m, Object: "model"}) - } + dataModels, err := lms.ListModels(filter, excludeConfigured) + if err != nil { + return err } return c.JSON(struct { diff --git a/core/http/endpoints/openai/request.go b/core/http/endpoints/openai/request.go deleted file mode 100644 index 369fb0b8..00000000 --- a/core/http/endpoints/openai/request.go +++ /dev/null @@ -1,285 +0,0 @@ -package openai - -import ( - "context" - "encoding/base64" - "encoding/json" - "fmt" - "io" - "net/http" - "strings" - - "github.com/go-skynet/LocalAI/core/config" - fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" - "github.com/go-skynet/LocalAI/core/schema" - "github.com/go-skynet/LocalAI/pkg/grammar" - model "github.com/go-skynet/LocalAI/pkg/model" - "github.com/gofiber/fiber/v2" - "github.com/rs/zerolog/log" -) - -func readRequest(c *fiber.Ctx, ml *model.ModelLoader, o *config.ApplicationConfig, firstModel bool) (string, *schema.OpenAIRequest, error) { - input := new(schema.OpenAIRequest) - - // Get input data from the request body - if err := c.BodyParser(input); err != nil { - return "", nil, fmt.Errorf("failed parsing request body: %w", err) - } - - received, _ := json.Marshal(input) - - ctx, cancel := context.WithCancel(o.Context) - input.Context = ctx - input.Cancel = cancel - - log.Debug().Msgf("Request received: %s", string(received)) - - modelFile, err := fiberContext.ModelFromContext(c, ml, input.Model, firstModel) - - return modelFile, input, err -} - -// this function check if the string is an URL, if it's an URL downloads the image in memory -// encodes it in base64 and returns the base64 string -func getBase64Image(s string) (string, error) { - if strings.HasPrefix(s, "http") { - // download the image - resp, err := http.Get(s) - if err != nil { - return "", err - } - defer resp.Body.Close() - - // read the image data into memory - data, err := io.ReadAll(resp.Body) - if err != nil { - return "", err - } - - // encode the image data in base64 - encoded := base64.StdEncoding.EncodeToString(data) - - // return the base64 string - return encoded, nil - } - - // if the string instead is prefixed with "data:image/jpeg;base64,", drop it - if strings.HasPrefix(s, "data:image/jpeg;base64,") { - return strings.ReplaceAll(s, "data:image/jpeg;base64,", ""), nil - } - return "", fmt.Errorf("not valid string") -} - -func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIRequest) { - if input.Echo { - config.Echo = input.Echo - } - if input.TopK != nil { - config.TopK = input.TopK - } - if input.TopP != nil { - config.TopP = input.TopP - } - - if input.Backend != "" { - config.Backend = input.Backend - } - - if input.ClipSkip != 0 { - config.Diffusers.ClipSkip = input.ClipSkip - } - - if input.ModelBaseName != "" { - config.AutoGPTQ.ModelBaseName = input.ModelBaseName - } - - if input.NegativePromptScale != 0 { - config.NegativePromptScale = input.NegativePromptScale - } - - if input.UseFastTokenizer { - config.UseFastTokenizer = input.UseFastTokenizer - } - - if input.NegativePrompt != "" { - config.NegativePrompt = input.NegativePrompt - } - - if input.RopeFreqBase != 0 { - config.RopeFreqBase = input.RopeFreqBase - } - - if input.RopeFreqScale != 0 { - config.RopeFreqScale = input.RopeFreqScale - } - - if input.Grammar != "" { - config.Grammar = input.Grammar - } - - if input.Temperature != nil { - config.Temperature = input.Temperature - } - - if input.Maxtokens != nil { - config.Maxtokens = input.Maxtokens - } - - switch stop := input.Stop.(type) { - case string: - if stop != "" { - config.StopWords = append(config.StopWords, stop) - } - case []interface{}: - for _, pp := range stop { - if s, ok := pp.(string); ok { - config.StopWords = append(config.StopWords, s) - } - } - } - - if len(input.Tools) > 0 { - for _, tool := range input.Tools { - input.Functions = append(input.Functions, tool.Function) - } - } - - if input.ToolsChoice != nil { - var toolChoice grammar.Tool - - switch content := input.ToolsChoice.(type) { - case string: - _ = json.Unmarshal([]byte(content), &toolChoice) - case map[string]interface{}: - dat, _ := json.Marshal(content) - _ = json.Unmarshal(dat, &toolChoice) - } - input.FunctionCall = map[string]interface{}{ - "name": toolChoice.Function.Name, - } - } - - // Decode each request's message content - index := 0 - for i, m := range input.Messages { - switch content := m.Content.(type) { - case string: - input.Messages[i].StringContent = content - case []interface{}: - dat, _ := json.Marshal(content) - c := []schema.Content{} - json.Unmarshal(dat, &c) - for _, pp := range c { - if pp.Type == "text" { - input.Messages[i].StringContent = pp.Text - } else if pp.Type == "image_url" { - // Detect if pp.ImageURL is an URL, if it is download the image and encode it in base64: - base64, err := getBase64Image(pp.ImageURL.URL) - if err == nil { - input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff - // set a placeholder for each image - input.Messages[i].StringContent = fmt.Sprintf("[img-%d]", index) + input.Messages[i].StringContent - index++ - } else { - fmt.Print("Failed encoding image", err) - } - } - } - } - } - - if input.RepeatPenalty != 0 { - config.RepeatPenalty = input.RepeatPenalty - } - - if input.FrequencyPenalty != 0 { - config.FrequencyPenalty = input.FrequencyPenalty - } - - if input.PresencePenalty != 0 { - config.PresencePenalty = input.PresencePenalty - } - - if input.Keep != 0 { - config.Keep = input.Keep - } - - if input.Batch != 0 { - config.Batch = input.Batch - } - - if input.IgnoreEOS { - config.IgnoreEOS = input.IgnoreEOS - } - - if input.Seed != nil { - config.Seed = input.Seed - } - - if input.TypicalP != nil { - config.TypicalP = input.TypicalP - } - - switch inputs := input.Input.(type) { - case string: - if inputs != "" { - config.InputStrings = append(config.InputStrings, inputs) - } - case []interface{}: - for _, pp := range inputs { - switch i := pp.(type) { - case string: - config.InputStrings = append(config.InputStrings, i) - case []interface{}: - tokens := []int{} - for _, ii := range i { - tokens = append(tokens, int(ii.(float64))) - } - config.InputToken = append(config.InputToken, tokens) - } - } - } - - // Can be either a string or an object - switch fnc := input.FunctionCall.(type) { - case string: - if fnc != "" { - config.SetFunctionCallString(fnc) - } - case map[string]interface{}: - var name string - n, exists := fnc["name"] - if exists { - nn, e := n.(string) - if e { - name = nn - } - } - config.SetFunctionCallNameString(name) - } - - switch p := input.Prompt.(type) { - case string: - config.PromptStrings = append(config.PromptStrings, p) - case []interface{}: - for _, pp := range p { - if s, ok := pp.(string); ok { - config.PromptStrings = append(config.PromptStrings, s) - } - } - } -} - -func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *config.BackendConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.BackendConfig, *schema.OpenAIRequest, error) { - cfg, err := cm.LoadBackendConfigFileByName(modelFile, loader.ModelPath, - config.LoadOptionDebug(debug), - config.LoadOptionThreads(threads), - config.LoadOptionContextSize(ctx), - config.LoadOptionF16(f16), - ) - - // Set the parameters for the language model prediction - updateRequestConfig(cfg, input) - - return cfg, input, err -} diff --git a/core/http/endpoints/openai/transcription.go b/core/http/endpoints/openai/transcription.go index c7dd39e7..572cec12 100644 --- a/core/http/endpoints/openai/transcription.go +++ b/core/http/endpoints/openai/transcription.go @@ -9,8 +9,7 @@ import ( "path/filepath" "github.com/go-skynet/LocalAI/core/backend" - "github.com/go-skynet/LocalAI/core/config" - model "github.com/go-skynet/LocalAI/pkg/model" + fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" "github.com/gofiber/fiber/v2" "github.com/rs/zerolog/log" @@ -23,17 +22,15 @@ import ( // @Param file formData file true "file" // @Success 200 {object} map[string]string "Response" // @Router /v1/audio/transcriptions [post] -func TranscriptEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { +func TranscriptEndpoint(fce *fiberContext.FiberContextExtractor, tbs *backend.TranscriptionBackendService) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - m, input, err := readRequest(c, ml, appConfig, false) + _, request, err := fce.OpenAIRequestFromContext(c, false) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } - config, input, err := mergeRequestWithConfig(m, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16) - if err != nil { - return fmt.Errorf("failed reading parameters from request:%w", err) - } + // TODO: Investigate this file copy stuff later - potentially belongs in service. + // retrieve the file data from the request file, err := c.FormFile("file") if err != nil { @@ -65,13 +62,16 @@ func TranscriptEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a log.Debug().Msgf("Audio file copied to: %+v", dst) - tr, err := backend.ModelTranscription(dst, input.Language, ml, *config, appConfig) - if err != nil { - return err - } + request.File = dst - log.Debug().Msgf("Trascribed: %+v", tr) + responseChannel := tbs.Transcribe(request) + rawResponse := <-responseChannel + + if rawResponse.Error != nil { + return rawResponse.Error + } + log.Debug().Msgf("Transcribed: %+v", rawResponse.Value) // TODO: handle different outputs here - return c.Status(http.StatusOK).JSON(tr) + return c.Status(http.StatusOK).JSON(rawResponse.Value) } } diff --git a/core/schema/whisper.go b/core/schema/transcription.go similarity index 90% rename from core/schema/whisper.go rename to core/schema/transcription.go index 41413c1f..fe1799fa 100644 --- a/core/schema/whisper.go +++ b/core/schema/transcription.go @@ -10,7 +10,7 @@ type Segment struct { Tokens []int `json:"tokens"` } -type Result struct { +type TranscriptionResult struct { Segments []Segment `json:"segments"` Text string `json:"text"` } diff --git a/core/services/backend_monitor.go b/core/services/backend_monitor.go index 979a67a3..a610432c 100644 --- a/core/services/backend_monitor.go +++ b/core/services/backend_monitor.go @@ -15,22 +15,22 @@ import ( gopsutil "github.com/shirou/gopsutil/v3/process" ) -type BackendMonitor struct { +type BackendMonitorService struct { configLoader *config.BackendConfigLoader modelLoader *model.ModelLoader options *config.ApplicationConfig // Taking options in case we need to inspect ExternalGRPCBackends, though that's out of scope for now, hence the name. } -func NewBackendMonitor(configLoader *config.BackendConfigLoader, modelLoader *model.ModelLoader, appConfig *config.ApplicationConfig) BackendMonitor { - return BackendMonitor{ +func NewBackendMonitorService(modelLoader *model.ModelLoader, configLoader *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *BackendMonitorService { + return &BackendMonitorService{ configLoader: configLoader, modelLoader: modelLoader, options: appConfig, } } -func (bm BackendMonitor) getModelLoaderIDFromModelName(modelName string) (string, error) { - config, exists := bm.configLoader.GetBackendConfig(modelName) +func (bms BackendMonitorService) getModelLoaderIDFromModelName(modelName string) (string, error) { + config, exists := bms.configLoader.GetBackendConfig(modelName) var backendId string if exists { backendId = config.Model @@ -46,8 +46,8 @@ func (bm BackendMonitor) getModelLoaderIDFromModelName(modelName string) (string return backendId, nil } -func (bm *BackendMonitor) SampleLocalBackendProcess(model string) (*schema.BackendMonitorResponse, error) { - config, exists := bm.configLoader.GetBackendConfig(model) +func (bms *BackendMonitorService) SampleLocalBackendProcess(model string) (*schema.BackendMonitorResponse, error) { + config, exists := bms.configLoader.GetBackendConfig(model) var backend string if exists { backend = config.Model @@ -60,7 +60,7 @@ func (bm *BackendMonitor) SampleLocalBackendProcess(model string) (*schema.Backe backend = fmt.Sprintf("%s.bin", backend) } - pid, err := bm.modelLoader.GetGRPCPID(backend) + pid, err := bms.modelLoader.GetGRPCPID(backend) if err != nil { log.Error().Err(err).Str("model", model).Msg("failed to find GRPC pid") @@ -101,12 +101,12 @@ func (bm *BackendMonitor) SampleLocalBackendProcess(model string) (*schema.Backe }, nil } -func (bm BackendMonitor) CheckAndSample(modelName string) (*proto.StatusResponse, error) { - backendId, err := bm.getModelLoaderIDFromModelName(modelName) +func (bms BackendMonitorService) CheckAndSample(modelName string) (*proto.StatusResponse, error) { + backendId, err := bms.getModelLoaderIDFromModelName(modelName) if err != nil { return nil, err } - modelAddr := bm.modelLoader.CheckIsLoaded(backendId) + modelAddr := bms.modelLoader.CheckIsLoaded(backendId) if modelAddr == "" { return nil, fmt.Errorf("backend %s is not currently loaded", backendId) } @@ -114,7 +114,7 @@ func (bm BackendMonitor) CheckAndSample(modelName string) (*proto.StatusResponse status, rpcErr := modelAddr.GRPC(false, nil).Status(context.TODO()) if rpcErr != nil { log.Warn().Msgf("backend %s experienced an error retrieving status info: %s", backendId, rpcErr.Error()) - val, slbErr := bm.SampleLocalBackendProcess(backendId) + val, slbErr := bms.SampleLocalBackendProcess(backendId) if slbErr != nil { return nil, fmt.Errorf("backend %s experienced an error retrieving status info via rpc: %s, then failed local node process sample: %s", backendId, rpcErr.Error(), slbErr.Error()) } @@ -131,10 +131,10 @@ func (bm BackendMonitor) CheckAndSample(modelName string) (*proto.StatusResponse return status, nil } -func (bm BackendMonitor) ShutdownModel(modelName string) error { - backendId, err := bm.getModelLoaderIDFromModelName(modelName) +func (bms BackendMonitorService) ShutdownModel(modelName string) error { + backendId, err := bms.getModelLoaderIDFromModelName(modelName) if err != nil { return err } - return bm.modelLoader.ShutdownModel(backendId) + return bms.modelLoader.ShutdownModel(backendId) } diff --git a/core/services/gallery.go b/core/services/gallery.go index b068abbb..1ef8e3e2 100644 --- a/core/services/gallery.go +++ b/core/services/gallery.go @@ -3,14 +3,18 @@ package services import ( "context" "encoding/json" + "errors" "os" + "path/filepath" "strings" "sync" "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/embedded" + "github.com/go-skynet/LocalAI/pkg/downloader" "github.com/go-skynet/LocalAI/pkg/gallery" - "github.com/go-skynet/LocalAI/pkg/startup" "github.com/go-skynet/LocalAI/pkg/utils" + "github.com/rs/zerolog/log" "gopkg.in/yaml.v2" ) @@ -29,18 +33,6 @@ func NewGalleryService(modelPath string) *GalleryService { } } -func prepareModel(modelPath string, req gallery.GalleryModel, cl *config.BackendConfigLoader, downloadStatus func(string, string, string, float64)) error { - - config, err := gallery.GetGalleryConfigFromURL(req.URL) - if err != nil { - return err - } - - config.Files = append(config.Files, req.AdditionalFiles...) - - return gallery.InstallModel(modelPath, req.Name, &config, req.Overrides, downloadStatus) -} - func (g *GalleryService) UpdateStatus(s string, op *gallery.GalleryOpStatus) { g.Lock() defer g.Unlock() @@ -92,10 +84,10 @@ func (g *GalleryService) Start(c context.Context, cl *config.BackendConfigLoader err = gallery.InstallModelFromGalleryByName(op.Galleries, op.GalleryName, g.modelPath, op.Req, progressCallback) } } else if op.ConfigURL != "" { - startup.PreloadModelsConfigurations(op.ConfigURL, g.modelPath, op.ConfigURL) + PreloadModelsConfigurations(op.ConfigURL, g.modelPath, op.ConfigURL) err = cl.Preload(g.modelPath) } else { - err = prepareModel(g.modelPath, op.Req, cl, progressCallback) + err = prepareModel(g.modelPath, op.Req, progressCallback) } if err != nil { @@ -127,13 +119,12 @@ type galleryModel struct { ID string `json:"id"` } -func processRequests(modelPath, s string, cm *config.BackendConfigLoader, galleries []gallery.Gallery, requests []galleryModel) error { +func processRequests(modelPath string, galleries []gallery.Gallery, requests []galleryModel) error { var err error for _, r := range requests { utils.ResetDownloadTimers() if r.ID == "" { - err = prepareModel(modelPath, r.GalleryModel, cm, utils.DisplayDownloadFunction) - + err = prepareModel(modelPath, r.GalleryModel, utils.DisplayDownloadFunction) } else { if strings.Contains(r.ID, "@") { err = gallery.InstallModelFromGallery( @@ -158,7 +149,7 @@ func ApplyGalleryFromFile(modelPath, s string, cl *config.BackendConfigLoader, g return err } - return processRequests(modelPath, s, cl, galleries, requests) + return processRequests(modelPath, galleries, requests) } func ApplyGalleryFromString(modelPath, s string, cl *config.BackendConfigLoader, galleries []gallery.Gallery) error { @@ -168,5 +159,90 @@ func ApplyGalleryFromString(modelPath, s string, cl *config.BackendConfigLoader, return err } - return processRequests(modelPath, s, cl, galleries, requests) + return processRequests(modelPath, galleries, requests) +} + +// PreloadModelsConfigurations will preload models from the given list of URLs +// It will download the model if it is not already present in the model path +// It will also try to resolve if the model is an embedded model YAML configuration +func PreloadModelsConfigurations(modelLibraryURL string, modelPath string, models ...string) { + for _, url := range models { + + // As a best effort, try to resolve the model from the remote library + // if it's not resolved we try with the other method below + if modelLibraryURL != "" { + lib, err := embedded.GetRemoteLibraryShorteners(modelLibraryURL) + if err == nil { + if lib[url] != "" { + log.Debug().Msgf("[startup] model configuration is defined remotely: %s (%s)", url, lib[url]) + url = lib[url] + } + } + } + + url = embedded.ModelShortURL(url) + switch { + case embedded.ExistsInModelsLibrary(url): + modelYAML, err := embedded.ResolveContent(url) + // If we resolve something, just save it to disk and continue + if err != nil { + log.Error().Err(err).Msg("error resolving model content") + continue + } + + log.Debug().Msgf("[startup] resolved embedded model: %s", url) + md5Name := utils.MD5(url) + modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml" + if err := os.WriteFile(modelDefinitionFilePath, modelYAML, os.ModePerm); err != nil { + log.Error().Err(err).Str("filepath", modelDefinitionFilePath).Msg("error writing model definition") + } + case downloader.LooksLikeURL(url): + log.Debug().Msgf("[startup] resolved model to download: %s", url) + + // md5 of model name + md5Name := utils.MD5(url) + + // check if file exists + if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) { + modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml" + err := downloader.DownloadFile(url, modelDefinitionFilePath, "", func(fileName, current, total string, percent float64) { + utils.DisplayDownloadFunction(fileName, current, total, percent) + }) + if err != nil { + log.Error().Err(err).Str("url", url).Str("filepath", modelDefinitionFilePath).Msg("error downloading model") + } + } + default: + if _, err := os.Stat(url); err == nil { + log.Debug().Msgf("[startup] resolved local model: %s", url) + // copy to modelPath + md5Name := utils.MD5(url) + + modelYAML, err := os.ReadFile(url) + if err != nil { + log.Error().Err(err).Str("filepath", url).Msg("error reading model definition") + continue + } + + modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml" + if err := os.WriteFile(modelDefinitionFilePath, modelYAML, os.ModePerm); err != nil { + log.Error().Err(err).Str("filepath", modelDefinitionFilePath).Msg("error loading model: %s") + } + } else { + log.Warn().Msgf("[startup] failed resolving model '%s'", url) + } + } + } +} + +func prepareModel(modelPath string, req gallery.GalleryModel, downloadStatus func(string, string, string, float64)) error { + + config, err := gallery.GetGalleryConfigFromURL(req.URL) + if err != nil { + return err + } + + config.Files = append(config.Files, req.AdditionalFiles...) + + return gallery.InstallModel(modelPath, req.Name, &config, req.Overrides, downloadStatus) } diff --git a/core/services/list_models.go b/core/services/list_models.go new file mode 100644 index 00000000..a21e6faf --- /dev/null +++ b/core/services/list_models.go @@ -0,0 +1,72 @@ +package services + +import ( + "regexp" + + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/schema" + "github.com/go-skynet/LocalAI/pkg/model" +) + +type ListModelsService struct { + bcl *config.BackendConfigLoader + ml *model.ModelLoader + appConfig *config.ApplicationConfig +} + +func NewListModelsService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *ListModelsService { + return &ListModelsService{ + bcl: bcl, + ml: ml, + appConfig: appConfig, + } +} + +func (lms *ListModelsService) ListModels(filter string, excludeConfigured bool) ([]schema.OpenAIModel, error) { + + models, err := lms.ml.ListModels() + if err != nil { + return nil, err + } + + var mm map[string]interface{} = map[string]interface{}{} + + dataModels := []schema.OpenAIModel{} + + var filterFn func(name string) bool + + // If filter is not specified, do not filter the list by model name + if filter == "" { + filterFn = func(_ string) bool { return true } + } else { + // If filter _IS_ specified, we compile it to a regex which is used to create the filterFn + rxp, err := regexp.Compile(filter) + if err != nil { + return nil, err + } + filterFn = func(name string) bool { + return rxp.MatchString(name) + } + } + + // Start with the known configurations + for _, c := range lms.bcl.GetAllBackendConfigs() { + if excludeConfigured { + mm[c.Model] = nil + } + + if filterFn(c.Name) { + dataModels = append(dataModels, schema.OpenAIModel{ID: c.Name, Object: "model"}) + } + } + + // Then iterate through the loose files: + for _, m := range models { + // And only adds them if they shouldn't be skipped. + if _, exists := mm[m]; !exists && filterFn(m) { + dataModels = append(dataModels, schema.OpenAIModel{ID: m, Object: "model"}) + } + } + + return dataModels, nil +} diff --git a/pkg/startup/model_preload_test.go b/core/services/model_preload_test.go similarity index 96% rename from pkg/startup/model_preload_test.go rename to core/services/model_preload_test.go index 63a8f8b0..fc65d565 100644 --- a/pkg/startup/model_preload_test.go +++ b/core/services/model_preload_test.go @@ -1,13 +1,14 @@ -package startup_test +package services_test import ( "fmt" "os" "path/filepath" - . "github.com/go-skynet/LocalAI/pkg/startup" "github.com/go-skynet/LocalAI/pkg/utils" + . "github.com/go-skynet/LocalAI/core/services" + . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) diff --git a/core/services/openai.go b/core/services/openai.go new file mode 100644 index 00000000..0f61d6f4 --- /dev/null +++ b/core/services/openai.go @@ -0,0 +1,805 @@ +package services + +import ( + "encoding/json" + "errors" + "fmt" + "strings" + "sync" + "time" + + "github.com/go-skynet/LocalAI/core/backend" + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/schema" + "github.com/go-skynet/LocalAI/pkg/concurrency" + "github.com/go-skynet/LocalAI/pkg/grammar" + "github.com/go-skynet/LocalAI/pkg/model" + "github.com/go-skynet/LocalAI/pkg/utils" + "github.com/google/uuid" + "github.com/imdario/mergo" + "github.com/rs/zerolog/log" +) + +type endpointGenerationConfigurationFn func(bc *config.BackendConfig, request *schema.OpenAIRequest) endpointConfiguration + +type endpointConfiguration struct { + SchemaObject string + TemplatePath string + TemplateData model.PromptTemplateData + ResultMappingFn func(resp *backend.LLMResponse, index int) schema.Choice + CompletionMappingFn func(resp concurrency.ErrorOr[*backend.LLMResponse]) concurrency.ErrorOr[*schema.OpenAIResponse] + TokenMappingFn func(resp concurrency.ErrorOr[*backend.LLMResponse]) concurrency.ErrorOr[*schema.OpenAIResponse] +} + +// TODO: This is used for completion and edit. I am pretty sure I forgot parts, but fix it later. +func simpleMapper(resp concurrency.ErrorOr[*backend.LLMResponse]) concurrency.ErrorOr[*schema.OpenAIResponse] { + if resp.Error != nil || resp.Value == nil { + return concurrency.ErrorOr[*schema.OpenAIResponse]{Error: resp.Error} + } + return concurrency.ErrorOr[*schema.OpenAIResponse]{ + Value: &schema.OpenAIResponse{ + Choices: []schema.Choice{ + { + Text: resp.Value.Response, + }, + }, + Usage: schema.OpenAIUsage{ + PromptTokens: resp.Value.Usage.Prompt, + CompletionTokens: resp.Value.Usage.Completion, + TotalTokens: resp.Value.Usage.Prompt + resp.Value.Usage.Completion, + }, + }, + } +} + +// TODO: Consider alternative names for this. +// The purpose of this struct is to hold a reference to the OpenAI request context information +// This keeps things simple within core/services/openai.go and allows consumers to "see" this information if they need it +type OpenAIRequestTraceID struct { + ID string + Created int +} + +// This type split out from core/backend/llm.go - I'm still not _totally_ sure about this, but it seems to make sense to keep the generic LLM code from the OpenAI specific higher level functionality +type OpenAIService struct { + bcl *config.BackendConfigLoader + ml *model.ModelLoader + appConfig *config.ApplicationConfig + llmbs *backend.LLMBackendService +} + +func NewOpenAIService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig, llmbs *backend.LLMBackendService) *OpenAIService { + return &OpenAIService{ + bcl: bcl, + ml: ml, + appConfig: appConfig, + llmbs: llmbs, + } +} + +// Keeping in place as a reminder to POTENTIALLY ADD MORE VALIDATION HERE??? +func (oais *OpenAIService) getConfig(request *schema.OpenAIRequest) (*config.BackendConfig, *schema.OpenAIRequest, error) { + return oais.bcl.LoadBackendConfigForModelAndOpenAIRequest(request.Model, request, oais.appConfig) +} + +// TODO: It would be a lot less messy to make a return struct that had references to each of these channels +// INTENTIONALLY not doing that quite yet - I believe we need to let the references to unused channels die for the GC to automatically collect -- can we manually free()? +// finalResultsChannel is the primary async return path: one result for the entire request. +// promptResultsChannels is DUBIOUS. It's expected to be raw fan-out used within the function itself, but I am exposing for testing? One bundle of LLMResponseBundle per PromptString? Gets all N completions for a single prompt. +// completionsChannel is a channel that emits one *LLMResponse per generated completion, be that different prompts or N. Seems the most useful other than "entire request" Request is available to attempt tracing??? +// tokensChannel is a channel that emits one *LLMResponse per generated token. Let's see what happens! +func (oais *OpenAIService) Completion(request *schema.OpenAIRequest, notifyOnPromptResult bool, notifyOnToken bool) ( + traceID *OpenAIRequestTraceID, finalResultChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], promptResultsChannels []<-chan concurrency.ErrorOr[*backend.LLMResponseBundle], + completionsChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], tokenChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], err error) { + + return oais.GenerateTextFromRequest(request, func(bc *config.BackendConfig, request *schema.OpenAIRequest) endpointConfiguration { + return endpointConfiguration{ + SchemaObject: "text_completion", + TemplatePath: bc.TemplateConfig.Completion, + TemplateData: model.PromptTemplateData{ + SystemPrompt: bc.SystemPrompt, + }, + ResultMappingFn: func(resp *backend.LLMResponse, promptIndex int) schema.Choice { + return schema.Choice{ + Index: promptIndex, + FinishReason: "stop", + Text: resp.Response, + } + }, + CompletionMappingFn: simpleMapper, + TokenMappingFn: simpleMapper, + } + }, notifyOnPromptResult, notifyOnToken, nil) +} + +func (oais *OpenAIService) Edit(request *schema.OpenAIRequest, notifyOnPromptResult bool, notifyOnToken bool) ( + traceID *OpenAIRequestTraceID, finalResultChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], promptResultsChannels []<-chan concurrency.ErrorOr[*backend.LLMResponseBundle], + completionsChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], tokenChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], err error) { + + return oais.GenerateTextFromRequest(request, func(bc *config.BackendConfig, request *schema.OpenAIRequest) endpointConfiguration { + + return endpointConfiguration{ + SchemaObject: "edit", + TemplatePath: bc.TemplateConfig.Edit, + TemplateData: model.PromptTemplateData{ + SystemPrompt: bc.SystemPrompt, + Instruction: request.Instruction, + }, + ResultMappingFn: func(resp *backend.LLMResponse, promptIndex int) schema.Choice { + return schema.Choice{ + Index: promptIndex, + FinishReason: "stop", + Text: resp.Response, + } + }, + CompletionMappingFn: simpleMapper, + TokenMappingFn: simpleMapper, + } + }, notifyOnPromptResult, notifyOnToken, nil) +} + +func (oais *OpenAIService) Chat(request *schema.OpenAIRequest, notifyOnPromptResult bool, notifyOnToken bool) ( + traceID *OpenAIRequestTraceID, finalResultChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], + completionsChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], tokenChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], err error) { + + return oais.GenerateFromMultipleMessagesChatRequest(request, notifyOnPromptResult, notifyOnToken, nil) +} + +func (oais *OpenAIService) GenerateTextFromRequest(request *schema.OpenAIRequest, endpointConfigFn endpointGenerationConfigurationFn, notifyOnPromptResult bool, notifyOnToken bool, initialTraceID *OpenAIRequestTraceID) ( + traceID *OpenAIRequestTraceID, finalResultChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], promptResultsChannels []<-chan concurrency.ErrorOr[*backend.LLMResponseBundle], + completionsChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], tokenChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], err error) { + + if initialTraceID == nil { + traceID = &OpenAIRequestTraceID{ + ID: uuid.New().String(), + Created: int(time.Now().Unix()), + } + } else { + traceID = initialTraceID + } + + bc, request, err := oais.getConfig(request) + if err != nil { + log.Error().Msgf("[oais::GenerateTextFromRequest] error getting configuration: %q", err) + return + } + + if request.ResponseFormat.Type == "json_object" { + request.Grammar = grammar.JSONBNF + } + + bc.Grammar = request.Grammar + + if request.Stream && len(bc.PromptStrings) > 1 { + log.Warn().Msg("potentially cannot handle more than 1 `PromptStrings` when Streaming?") + } + + rawFinalResultChannel := make(chan concurrency.ErrorOr[*schema.OpenAIResponse]) + finalResultChannel = rawFinalResultChannel + promptResultsChannels = []<-chan concurrency.ErrorOr[*backend.LLMResponseBundle]{} + var rawCompletionsChannel chan concurrency.ErrorOr[*schema.OpenAIResponse] + var rawTokenChannel chan concurrency.ErrorOr[*schema.OpenAIResponse] + if notifyOnPromptResult { + rawCompletionsChannel = make(chan concurrency.ErrorOr[*schema.OpenAIResponse]) + } + if notifyOnToken { + rawTokenChannel = make(chan concurrency.ErrorOr[*schema.OpenAIResponse]) + } + + promptResultsChannelLock := sync.Mutex{} + + endpointConfig := endpointConfigFn(bc, request) + + if len(endpointConfig.TemplatePath) == 0 { + // A model can have a "file.bin.tmpl" file associated with a prompt template prefix + if oais.ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", bc.Model)) { + endpointConfig.TemplatePath = bc.Model + } else { + log.Warn().Msgf("failed to find any template for %+v", request) + } + } + + setupWG := sync.WaitGroup{} + var prompts []string + if lPS := len(bc.PromptStrings); lPS > 0 { + setupWG.Add(lPS) + prompts = bc.PromptStrings + } else { + setupWG.Add(len(bc.InputStrings)) + prompts = bc.InputStrings + } + + var setupError error = nil + + for pI, p := range prompts { + + go func(promptIndex int, prompt string) { + if endpointConfig.TemplatePath != "" { + promptTemplateData := model.PromptTemplateData{ + Input: prompt, + } + err := mergo.Merge(promptTemplateData, endpointConfig.TemplateData, mergo.WithOverride) + if err == nil { + templatedInput, err := oais.ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, endpointConfig.TemplatePath, promptTemplateData) + if err == nil { + prompt = templatedInput + log.Debug().Msgf("Template found, input modified to: %s", prompt) + } + } + } + + log.Debug().Msgf("[OAIS GenerateTextFromRequest] Prompt: %q", prompt) + promptResultsChannel, completionChannels, tokenChannels, err := oais.llmbs.GenerateText(prompt, request, bc, + func(r *backend.LLMResponse) schema.Choice { + return endpointConfig.ResultMappingFn(r, promptIndex) + }, notifyOnPromptResult, notifyOnToken) + if err != nil { + log.Error().Msgf("Unable to generate text prompt: %q\nerr: %q", prompt, err) + promptResultsChannelLock.Lock() + setupError = errors.Join(setupError, err) + promptResultsChannelLock.Unlock() + setupWG.Done() + return + } + if notifyOnPromptResult { + concurrency.SliceOfChannelsRawMergerWithoutMapping(concurrency.SliceOfChannelsTransformer(completionChannels, endpointConfig.CompletionMappingFn), rawCompletionsChannel, true) + } + if notifyOnToken { + concurrency.SliceOfChannelsRawMergerWithoutMapping(concurrency.SliceOfChannelsTransformer(tokenChannels, endpointConfig.TokenMappingFn), rawTokenChannel, true) + } + promptResultsChannelLock.Lock() + promptResultsChannels = append(promptResultsChannels, promptResultsChannel) + promptResultsChannelLock.Unlock() + setupWG.Done() + }(pI, p) + + } + setupWG.Wait() + + // If any of the setup goroutines experienced an error, quit early here. + if setupError != nil { + go func() { + log.Error().Msgf("[OAIS GenerateTextFromRequest] caught an error during setup: %q", setupError) + rawFinalResultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: setupError} + close(rawFinalResultChannel) + }() + return + } + + initialResponse := &schema.OpenAIResponse{ + ID: traceID.ID, + Created: traceID.Created, + Model: request.Model, + Object: endpointConfig.SchemaObject, + Usage: schema.OpenAIUsage{}, + } + + // utils.SliceOfChannelsRawMerger[[]schema.Choice](promptResultsChannels, rawFinalResultChannel, func(results []schema.Choice) (*schema.OpenAIResponse, error) { + concurrency.SliceOfChannelsReducer( + promptResultsChannels, rawFinalResultChannel, + func(iv concurrency.ErrorOr[*backend.LLMResponseBundle], result concurrency.ErrorOr[*schema.OpenAIResponse]) concurrency.ErrorOr[*schema.OpenAIResponse] { + + if iv.Error != nil { + result.Error = iv.Error + return result + } + result.Value.Usage.PromptTokens += iv.Value.Usage.Prompt + result.Value.Usage.CompletionTokens += iv.Value.Usage.Completion + result.Value.Usage.TotalTokens = result.Value.Usage.PromptTokens + result.Value.Usage.CompletionTokens + + result.Value.Choices = append(result.Value.Choices, iv.Value.Response...) + + return result + }, concurrency.ErrorOr[*schema.OpenAIResponse]{Value: initialResponse}, true) + + completionsChannel = rawCompletionsChannel + tokenChannel = rawTokenChannel + + return +} + +// TODO: For porting sanity, this is distinct from GenerateTextFromRequest and is _currently_ specific to Chat purposes +// this is not a final decision -- just a reality of moving a lot of parts at once +// / This has _become_ Chat which wasn't the goal... More cleanup in the future once it's stable? +func (oais *OpenAIService) GenerateFromMultipleMessagesChatRequest(request *schema.OpenAIRequest, notifyOnPromptResult bool, notifyOnToken bool, initialTraceID *OpenAIRequestTraceID) ( + traceID *OpenAIRequestTraceID, finalResultChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], + completionsChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], tokenChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], err error) { + + if initialTraceID == nil { + traceID = &OpenAIRequestTraceID{ + ID: uuid.New().String(), + Created: int(time.Now().Unix()), + } + } else { + traceID = initialTraceID + } + + bc, request, err := oais.getConfig(request) + if err != nil { + return + } + + // Allow the user to set custom actions via config file + // to be "embedded" in each model + noActionName := "answer" + noActionDescription := "use this action to answer without performing any action" + + if bc.FunctionsConfig.NoActionFunctionName != "" { + noActionName = bc.FunctionsConfig.NoActionFunctionName + } + if bc.FunctionsConfig.NoActionDescriptionName != "" { + noActionDescription = bc.FunctionsConfig.NoActionDescriptionName + } + + if request.ResponseFormat.Type == "json_object" { + request.Grammar = grammar.JSONBNF + } + + bc.Grammar = request.Grammar + + processFunctions := false + funcs := grammar.Functions{} + // process functions if we have any defined or if we have a function call string + if len(request.Functions) > 0 && bc.ShouldUseFunctions() { + log.Debug().Msgf("Response needs to process functions") + + processFunctions = true + + noActionGrammar := grammar.Function{ + Name: noActionName, + Description: noActionDescription, + Parameters: map[string]interface{}{ + "properties": map[string]interface{}{ + "message": map[string]interface{}{ + "type": "string", + "description": "The message to reply the user with", + }}, + }, + } + + // Append the no action function + funcs = append(funcs, request.Functions...) + if !bc.FunctionsConfig.DisableNoAction { + funcs = append(funcs, noActionGrammar) + } + + // Force picking one of the functions by the request + if bc.FunctionToCall() != "" { + funcs = funcs.Select(bc.FunctionToCall()) + } + + // Update input grammar + jsStruct := funcs.ToJSONStructure() + bc.Grammar = jsStruct.Grammar("", bc.FunctionsConfig.ParallelCalls) + } else if request.JSONFunctionGrammarObject != nil { + bc.Grammar = request.JSONFunctionGrammarObject.Grammar("", bc.FunctionsConfig.ParallelCalls) + } + + if request.Stream && processFunctions { + log.Warn().Msg("Streaming + Functions is highly experimental in this version") + } + + var predInput string + + if !bc.TemplateConfig.UseTokenizerTemplate || processFunctions { + + suppressConfigSystemPrompt := false + mess := []string{} + for messageIndex, i := range request.Messages { + var content string + role := i.Role + + // if function call, we might want to customize the role so we can display better that the "assistant called a json action" + // if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request + if (i.FunctionCall != nil || i.ToolCalls != nil) && i.Role == "assistant" { + roleFn := "assistant_function_call" + r := bc.Roles[roleFn] + if r != "" { + role = roleFn + } + } + r := bc.Roles[role] + contentExists := i.Content != nil && i.StringContent != "" + + fcall := i.FunctionCall + if len(i.ToolCalls) > 0 { + fcall = i.ToolCalls + } + + // First attempt to populate content via a chat message specific template + if bc.TemplateConfig.ChatMessage != "" { + chatMessageData := model.ChatMessageTemplateData{ + SystemPrompt: bc.SystemPrompt, + Role: r, + RoleName: role, + Content: i.StringContent, + FunctionCall: fcall, + FunctionName: i.Name, + LastMessage: messageIndex == (len(request.Messages) - 1), + Function: bc.Grammar != "" && (messageIndex == (len(request.Messages) - 1)), + MessageIndex: messageIndex, + } + templatedChatMessage, err := oais.ml.EvaluateTemplateForChatMessage(bc.TemplateConfig.ChatMessage, chatMessageData) + if err != nil { + log.Error().Msgf("error processing message %+v using template \"%s\": %v. Skipping!", chatMessageData, bc.TemplateConfig.ChatMessage, err) + } else { + if templatedChatMessage == "" { + log.Warn().Msgf("template \"%s\" produced blank output for %+v. Skipping!", bc.TemplateConfig.ChatMessage, chatMessageData) + continue // TODO: This continue is here intentionally to skip over the line `mess = append(mess, content)` below, and to prevent the sprintf + } + log.Debug().Msgf("templated message for chat: %s", templatedChatMessage) + content = templatedChatMessage + } + } + marshalAnyRole := func(f any) { + j, err := json.Marshal(f) + if err == nil { + if contentExists { + content += "\n" + fmt.Sprint(r, " ", string(j)) + } else { + content = fmt.Sprint(r, " ", string(j)) + } + } + } + marshalAny := func(f any) { + j, err := json.Marshal(f) + if err == nil { + if contentExists { + content += "\n" + string(j) + } else { + content = string(j) + } + } + } + // If this model doesn't have such a template, or if that template fails to return a value, template at the message level. + if content == "" { + if r != "" { + if contentExists { + content = fmt.Sprint(r, i.StringContent) + } + + if i.FunctionCall != nil { + marshalAnyRole(i.FunctionCall) + } + } else { + if contentExists { + content = fmt.Sprint(i.StringContent) + } + + if i.FunctionCall != nil { + marshalAny(i.FunctionCall) + } + + if i.ToolCalls != nil { + marshalAny(i.ToolCalls) + } + } + // Special Handling: System. We care if it was printed at all, not the r branch, so check seperately + if contentExists && role == "system" { + suppressConfigSystemPrompt = true + } + } + + mess = append(mess, content) + } + + predInput = strings.Join(mess, "\n") + + log.Debug().Msgf("Prompt (before templating): %s", predInput) + + templateFile := "" + // A model can have a "file.bin.tmpl" file associated with a prompt template prefix + if oais.ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", bc.Model)) { + templateFile = bc.Model + } + + if bc.TemplateConfig.Chat != "" && !processFunctions { + templateFile = bc.TemplateConfig.Chat + } + + if bc.TemplateConfig.Functions != "" && processFunctions { + templateFile = bc.TemplateConfig.Functions + } + + if templateFile != "" { + templatedInput, err := oais.ml.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{ + SystemPrompt: bc.SystemPrompt, + SuppressSystemPrompt: suppressConfigSystemPrompt, + Input: predInput, + Functions: funcs, + }) + if err == nil { + predInput = templatedInput + log.Debug().Msgf("Template found, input modified to: %s", predInput) + } else { + log.Debug().Msgf("Template failed loading: %s", err.Error()) + } + } + } + log.Debug().Msgf("Prompt (after templating): %s", predInput) + if processFunctions { + log.Debug().Msgf("Grammar: %+v", bc.Grammar) + } + + rawFinalResultChannel := make(chan concurrency.ErrorOr[*schema.OpenAIResponse]) + var rawCompletionsChannel chan concurrency.ErrorOr[*schema.OpenAIResponse] + var rawTokenChannel chan concurrency.ErrorOr[*schema.OpenAIResponse] + if notifyOnPromptResult { + rawCompletionsChannel = make(chan concurrency.ErrorOr[*schema.OpenAIResponse]) + } + if notifyOnToken { + rawTokenChannel = make(chan concurrency.ErrorOr[*schema.OpenAIResponse]) + } + + rawResultChannel, individualCompletionChannels, tokenChannels, err := oais.llmbs.GenerateText(predInput, request, bc, func(resp *backend.LLMResponse) schema.Choice { + return schema.Choice{ + Index: 0, // ??? + FinishReason: "stop", + Message: &schema.Message{ + Role: "assistant", + Content: resp.Response, + }, + } + }, notifyOnPromptResult, notifyOnToken) + + chatSimpleMappingFn := func(resp concurrency.ErrorOr[*backend.LLMResponse]) concurrency.ErrorOr[*schema.OpenAIResponse] { + if resp.Error != nil || resp.Value == nil { + return concurrency.ErrorOr[*schema.OpenAIResponse]{Error: resp.Error} + } + return concurrency.ErrorOr[*schema.OpenAIResponse]{ + Value: &schema.OpenAIResponse{ + ID: traceID.ID, + Created: traceID.Created, + Model: request.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: []schema.Choice{ + { + Delta: &schema.Message{ + Role: "assistant", + Content: resp.Value.Response, + }, + Index: 0, + }, + }, + Object: "chat.completion.chunk", + Usage: schema.OpenAIUsage{ + PromptTokens: resp.Value.Usage.Prompt, + CompletionTokens: resp.Value.Usage.Completion, + TotalTokens: resp.Value.Usage.Prompt + resp.Value.Usage.Completion, + }, + }, + } + } + + if notifyOnPromptResult { + concurrency.SliceOfChannelsRawMergerWithoutMapping(concurrency.SliceOfChannelsTransformer(individualCompletionChannels, chatSimpleMappingFn), rawCompletionsChannel, true) + } + if notifyOnToken { + concurrency.SliceOfChannelsRawMergerWithoutMapping(concurrency.SliceOfChannelsTransformer(tokenChannels, chatSimpleMappingFn), rawTokenChannel, true) + } + + go func() { + rawResult := <-rawResultChannel + if rawResult.Error != nil { + log.Warn().Msgf("OpenAIService::processTools GenerateText error [DEBUG THIS?] %q", rawResult.Error) + return + } + llmResponseChoices := rawResult.Value.Response + + if processFunctions && len(llmResponseChoices) > 1 { + log.Warn().Msgf("chat functions response with %d choices in response, debug this?", len(llmResponseChoices)) + log.Debug().Msgf("%+v", llmResponseChoices) + } + + for _, result := range rawResult.Value.Response { + // If no functions, just return the raw result. + if !processFunctions { + + resp := schema.OpenAIResponse{ + ID: traceID.ID, + Created: traceID.Created, + Model: request.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: []schema.Choice{result}, + Object: "chat.completion.chunk", + Usage: schema.OpenAIUsage{ + PromptTokens: rawResult.Value.Usage.Prompt, + CompletionTokens: rawResult.Value.Usage.Completion, + TotalTokens: rawResult.Value.Usage.Prompt + rawResult.Value.Usage.Prompt, + }, + } + + rawFinalResultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: &resp} + + continue + } + // At this point, things are function specific! + + // Oh no this can't be the right way to do this... but it works. Save us, mudler! + fString := fmt.Sprintf("%s", result.Message.Content) + results := parseFunctionCall(fString, bc.FunctionsConfig.ParallelCalls) + noActionToRun := (len(results) > 0 && results[0].name == noActionName) + + if noActionToRun { + log.Debug().Msg("-- noActionToRun branch --") + initialMessage := schema.OpenAIResponse{ + ID: traceID.ID, + Created: traceID.Created, + Model: request.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: ""}}}, + Object: "stop", + } + rawFinalResultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: &initialMessage} + + result, err := oais.handleQuestion(bc, request, results[0].arguments, predInput) + if err != nil { + log.Error().Msgf("error handling question: %s", err.Error()) + return + } + + resp := schema.OpenAIResponse{ + ID: traceID.ID, + Created: traceID.Created, + Model: request.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: []schema.Choice{{Delta: &schema.Message{Content: &result}, Index: 0}}, + Object: "chat.completion.chunk", + Usage: schema.OpenAIUsage{ + PromptTokens: rawResult.Value.Usage.Prompt, + CompletionTokens: rawResult.Value.Usage.Completion, + TotalTokens: rawResult.Value.Usage.Prompt + rawResult.Value.Usage.Prompt, + }, + } + + rawFinalResultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: &resp} + + } else { + log.Debug().Msgf("[GenerateFromMultipleMessagesChatRequest] fnResultsBranch: %+v", results) + for i, ss := range results { + name, args := ss.name, ss.arguments + + initialMessage := schema.OpenAIResponse{ + ID: traceID.ID, + Created: traceID.Created, + Model: request.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: []schema.Choice{{ + FinishReason: "function_call", + Message: &schema.Message{ + Role: "assistant", + ToolCalls: []schema.ToolCall{ + { + Index: i, + ID: traceID.ID, + Type: "function", + FunctionCall: schema.FunctionCall{ + Name: name, + Arguments: args, + }, + }, + }, + }}}, + Object: "chat.completion.chunk", + } + rawFinalResultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: &initialMessage} + } + } + } + + close(rawFinalResultChannel) + }() + + finalResultChannel = rawFinalResultChannel + completionsChannel = rawCompletionsChannel + tokenChannel = rawTokenChannel + return +} + +func (oais *OpenAIService) handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, args, prompt string) (string, error) { + log.Debug().Msgf("[handleQuestion called] nothing to do, computing a reply") + + // If there is a message that the LLM already sends as part of the JSON reply, use it + arguments := map[string]interface{}{} + json.Unmarshal([]byte(args), &arguments) + m, exists := arguments["message"] + if exists { + switch message := m.(type) { + case string: + if message != "" { + log.Debug().Msgf("Reply received from LLM: %s", message) + message = oais.llmbs.Finetune(*config, prompt, message) + log.Debug().Msgf("Reply received from LLM(finetuned): %s", message) + + return message, nil + } + } + } + + log.Debug().Msgf("No action received from LLM, without a message, computing a reply") + // Otherwise ask the LLM to understand the JSON output and the context, and return a message + // Note: This costs (in term of CPU/GPU) another computation + config.Grammar = "" + images := []string{} + for _, m := range input.Messages { + images = append(images, m.StringImages...) + } + + resultChannel, _, err := oais.llmbs.Inference(input.Context, &backend.LLMRequest{ + Text: prompt, + Images: images, + RawMessages: input.Messages, // Experimental + }, config, false) + + if err != nil { + log.Error().Msgf("inference setup error: %s", err.Error()) + return "", err + } + + raw := <-resultChannel + if raw.Error != nil { + log.Error().Msgf("inference error: %q", raw.Error.Error()) + return "", err + } + if raw.Value == nil { + log.Warn().Msgf("nil inference response") + return "", nil + } + return oais.llmbs.Finetune(*config, prompt, raw.Value.Response), nil +} + +type funcCallResults struct { + name string + arguments string +} + +func parseFunctionCall(llmresult string, multipleResults bool) []funcCallResults { + + results := []funcCallResults{} + + // TODO: use generics to avoid this code duplication + if multipleResults { + ss := []map[string]interface{}{} + s := utils.EscapeNewLines(llmresult) + json.Unmarshal([]byte(s), &ss) + + for _, s := range ss { + func_name, ok := s["function"] + if !ok { + continue + } + args, ok := s["arguments"] + if !ok { + continue + } + d, _ := json.Marshal(args) + funcName, ok := func_name.(string) + if !ok { + continue + } + results = append(results, funcCallResults{name: funcName, arguments: string(d)}) + } + } else { + // As we have to change the result before processing, we can't stream the answer token-by-token (yet?) + ss := map[string]interface{}{} + // This prevent newlines to break JSON parsing for clients + // s := utils.EscapeNewLines(llmresult) + json.Unmarshal([]byte(llmresult), &ss) + + // The grammar defines the function name as "function", while OpenAI returns "name" + func_name, ok := ss["function"] + if !ok { + log.Debug().Msg("ss[function] is not OK!") + return results + } + // Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object + args, ok := ss["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix) + if !ok { + log.Debug().Msg("ss[arguments] is not OK!") + return results + } + d, _ := json.Marshal(args) + funcName, ok := func_name.(string) + if !ok { + log.Debug().Msgf("unexpected func_name: %+v", func_name) + return results + } + results = append(results, funcCallResults{name: funcName, arguments: string(d)}) + } + return results +} diff --git a/core/startup/startup.go b/core/startup/startup.go index 6298f034..92ccaa9d 100644 --- a/core/startup/startup.go +++ b/core/startup/startup.go @@ -4,17 +4,21 @@ import ( "fmt" "os" + "github.com/go-skynet/LocalAI/core" + "github.com/go-skynet/LocalAI/core/backend" "github.com/go-skynet/LocalAI/core/config" + openaiendpoint "github.com/go-skynet/LocalAI/core/http/endpoints/openai" // TODO: This is dubious. Fix this when splitting assistant api up. "github.com/go-skynet/LocalAI/core/services" "github.com/go-skynet/LocalAI/internal" "github.com/go-skynet/LocalAI/pkg/assets" "github.com/go-skynet/LocalAI/pkg/model" - pkgStartup "github.com/go-skynet/LocalAI/pkg/startup" + "github.com/go-skynet/LocalAI/pkg/utils" "github.com/rs/zerolog" "github.com/rs/zerolog/log" ) -func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.ModelLoader, *config.ApplicationConfig, error) { +// (*config.BackendConfigLoader, *model.ModelLoader, *config.ApplicationConfig, error) { +func Startup(opts ...config.AppOption) (*core.Application, error) { options := config.NewApplicationConfig(opts...) zerolog.SetGlobalLevel(zerolog.InfoLevel) @@ -27,68 +31,75 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode // Make sure directories exists if options.ModelPath == "" { - return nil, nil, nil, fmt.Errorf("options.ModelPath cannot be empty") + return nil, fmt.Errorf("options.ModelPath cannot be empty") } err := os.MkdirAll(options.ModelPath, 0755) if err != nil { - return nil, nil, nil, fmt.Errorf("unable to create ModelPath: %q", err) + return nil, fmt.Errorf("unable to create ModelPath: %q", err) } if options.ImageDir != "" { err := os.MkdirAll(options.ImageDir, 0755) if err != nil { - return nil, nil, nil, fmt.Errorf("unable to create ImageDir: %q", err) + return nil, fmt.Errorf("unable to create ImageDir: %q", err) } } if options.AudioDir != "" { err := os.MkdirAll(options.AudioDir, 0755) if err != nil { - return nil, nil, nil, fmt.Errorf("unable to create AudioDir: %q", err) + return nil, fmt.Errorf("unable to create AudioDir: %q", err) } } if options.UploadDir != "" { err := os.MkdirAll(options.UploadDir, 0755) if err != nil { - return nil, nil, nil, fmt.Errorf("unable to create UploadDir: %q", err) + return nil, fmt.Errorf("unable to create UploadDir: %q", err) + } + } + if options.ConfigsDir != "" { + err := os.MkdirAll(options.ConfigsDir, 0755) + if err != nil { + return nil, fmt.Errorf("unable to create ConfigsDir: %q", err) } } - // - pkgStartup.PreloadModelsConfigurations(options.ModelLibraryURL, options.ModelPath, options.ModelsURL...) + // Load config jsons + utils.LoadConfig(options.UploadDir, openaiendpoint.UploadedFilesFile, &openaiendpoint.UploadedFiles) + utils.LoadConfig(options.ConfigsDir, openaiendpoint.AssistantsConfigFile, &openaiendpoint.Assistants) + utils.LoadConfig(options.ConfigsDir, openaiendpoint.AssistantsFileConfigFile, &openaiendpoint.AssistantFiles) - cl := config.NewBackendConfigLoader() - ml := model.NewModelLoader(options.ModelPath) + app := createApplication(options) - configLoaderOpts := options.ToConfigLoaderOptions() + services.PreloadModelsConfigurations(options.ModelLibraryURL, options.ModelPath, options.ModelsURL...) - if err := cl.LoadBackendConfigsFromPath(options.ModelPath, configLoaderOpts...); err != nil { + if err := app.BackendConfigLoader.LoadBackendConfigsFromPath(options.ModelPath, app.ApplicationConfig.ToConfigLoaderOptions()...); err != nil { log.Error().Err(err).Msg("error loading config files") } if options.ConfigFile != "" { - if err := cl.LoadBackendConfigFile(options.ConfigFile, configLoaderOpts...); err != nil { + if err := app.BackendConfigLoader.LoadBackendConfigFile(options.ConfigFile, app.ApplicationConfig.ToConfigLoaderOptions()...); err != nil { log.Error().Err(err).Msg("error loading config file") } } - if err := cl.Preload(options.ModelPath); err != nil { + if err := app.BackendConfigLoader.Preload(options.ModelPath); err != nil { log.Error().Err(err).Msg("error downloading models") } if options.PreloadJSONModels != "" { - if err := services.ApplyGalleryFromString(options.ModelPath, options.PreloadJSONModels, cl, options.Galleries); err != nil { - return nil, nil, nil, err + if err := services.ApplyGalleryFromString(options.ModelPath, options.PreloadJSONModels, app.BackendConfigLoader, options.Galleries); err != nil { + return nil, err } } if options.PreloadModelsFromPath != "" { - if err := services.ApplyGalleryFromFile(options.ModelPath, options.PreloadModelsFromPath, cl, options.Galleries); err != nil { - return nil, nil, nil, err + if err := services.ApplyGalleryFromFile(options.ModelPath, options.PreloadModelsFromPath, app.BackendConfigLoader, options.Galleries); err != nil { + return nil, err } } if options.Debug { - for _, v := range cl.ListBackendConfigs() { - cfg, _ := cl.GetBackendConfig(v) + for _, v := range app.BackendConfigLoader.ListBackendConfigs() { + cfg, _ := app.BackendConfigLoader.GetBackendConfig(v) log.Debug().Msgf("Model: %s (config: %+v)", v, cfg) } } @@ -106,17 +117,17 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode go func() { <-options.Context.Done() log.Debug().Msgf("Context canceled, shutting down") - ml.StopAllGRPC() + app.ModelLoader.StopAllGRPC() }() if options.WatchDog { wd := model.NewWatchDog( - ml, + app.ModelLoader, options.WatchDogBusyTimeout, options.WatchDogIdleTimeout, options.WatchDogBusy, options.WatchDogIdle) - ml.SetWatchDog(wd) + app.ModelLoader.SetWatchDog(wd) go wd.Run() go func() { <-options.Context.Done() @@ -126,5 +137,35 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode } log.Info().Msg("core/startup process completed!") - return cl, ml, options, nil + return app, nil +} + +// In Lieu of a proper DI framework, this function wires up the Application manually. +// This is in core/startup rather than core/state.go to keep package references clean! +func createApplication(appConfig *config.ApplicationConfig) *core.Application { + app := &core.Application{ + ApplicationConfig: appConfig, + BackendConfigLoader: config.NewBackendConfigLoader(), + ModelLoader: model.NewModelLoader(appConfig.ModelPath), + } + + var err error + + app.EmbeddingsBackendService = backend.NewEmbeddingsBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) + app.ImageGenerationBackendService = backend.NewImageGenerationBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) + app.LLMBackendService = backend.NewLLMBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) + app.TranscriptionBackendService = backend.NewTranscriptionBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) + app.TextToSpeechBackendService = backend.NewTextToSpeechBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) + + app.BackendMonitorService = services.NewBackendMonitorService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) + app.GalleryService = services.NewGalleryService(app.ApplicationConfig.ModelPath) + app.ListModelsService = services.NewListModelsService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) + app.OpenAIService = services.NewOpenAIService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig, app.LLMBackendService) + + app.LocalAIMetricsService, err = services.NewLocalAIMetricsService() + if err != nil { + log.Warn().Msg("Unable to initialize LocalAIMetricsService - non-fatal, optional service") + } + + return app } diff --git a/core/state.go b/core/state.go new file mode 100644 index 00000000..cf0d614b --- /dev/null +++ b/core/state.go @@ -0,0 +1,41 @@ +package core + +import ( + "github.com/go-skynet/LocalAI/core/backend" + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/services" + "github.com/go-skynet/LocalAI/pkg/model" +) + +// TODO: Can I come up with a better name or location for this? +// The purpose of this structure is to hold pointers to all initialized services, to make plumbing easy +// Perhaps a proper DI system is worth it in the future, but for now keep things simple. +type Application struct { + + // Application-Level Config + ApplicationConfig *config.ApplicationConfig + // ApplicationState *ApplicationState + + // Core Low-Level Services + BackendConfigLoader *config.BackendConfigLoader + ModelLoader *model.ModelLoader + + // Backend Services + EmbeddingsBackendService *backend.EmbeddingsBackendService + ImageGenerationBackendService *backend.ImageGenerationBackendService + LLMBackendService *backend.LLMBackendService + TranscriptionBackendService *backend.TranscriptionBackendService + TextToSpeechBackendService *backend.TextToSpeechBackendService + + // LocalAI System Services + BackendMonitorService *services.BackendMonitorService + GalleryService *services.GalleryService + ListModelsService *services.ListModelsService + LocalAIMetricsService *services.LocalAIMetricsService + OpenAIService *services.OpenAIService +} + +// TODO [NEXT PR?]: Break up ApplicationConfig. +// Migrate over stuff that is not set via config at all - especially runtime stuff +type ApplicationState struct { +} diff --git a/examples/bruno/LocalAI Test Requests/llm text/-completions Stream.bru b/examples/bruno/LocalAI Test Requests/llm text/-completions Stream.bru new file mode 100644 index 00000000..c33bafe1 --- /dev/null +++ b/examples/bruno/LocalAI Test Requests/llm text/-completions Stream.bru @@ -0,0 +1,25 @@ +meta { + name: -completions Stream + type: http + seq: 4 +} + +post { + url: {{PROTOCOL}}{{HOST}}:{{PORT}}/completions + body: json + auth: none +} + +headers { + Content-Type: application/json +} + +body:json { + { + "model": "{{DEFAULT_MODEL}}", + "prompt": "function downloadFile(string url, string outputPath) {", + "max_tokens": 256, + "temperature": 0.5, + "stream": true + } +} diff --git a/pkg/concurrency/concurrency.go b/pkg/concurrency/concurrency.go new file mode 100644 index 00000000..324e8cc5 --- /dev/null +++ b/pkg/concurrency/concurrency.go @@ -0,0 +1,135 @@ +package concurrency + +import ( + "sync" +) + +// TODO: closeWhenDone bool parameter :: +// It currently is experimental, and therefore exists. +// Is there ever a situation to use false? + +// This function is used to merge the results of a slice of channels of a specific result type down to a single result channel of a second type. +// mappingFn allows the caller to convert from the input type to the output type +// if closeWhenDone is set to true, the output channel will be closed when all individual result channels of the slice have been closed - otherwise it will be left open for future use. +// The same WaitGroup used to trigger that optional closing is returned for any other synchronization purposes. +func SliceOfChannelsRawMerger[IndividualResultType any, OutputResultType any](individualResultChannels []<-chan IndividualResultType, outputChannel chan<- OutputResultType, mappingFn func(IndividualResultType) (OutputResultType, error), closeWhenDone bool) *sync.WaitGroup { + var wg sync.WaitGroup + wg.Add(len(individualResultChannels)) + mergingFn := func(c <-chan IndividualResultType) { + for r := range c { + mr, err := mappingFn(r) + if err == nil { + outputChannel <- mr + } + } + wg.Done() + } + for _, irc := range individualResultChannels { + go mergingFn(irc) + } + if closeWhenDone { + go func() { + wg.Wait() + close(outputChannel) + }() + } + + return &wg +} + +// This function is used to merge the results of a slice of channels of a specific result type down to a single result channel of THE SAME TYPE. +// if closeWhenDone is set to true, the output channel will be closed when all individual result channels of the slice have been closed - otherwise it will be left open for future use. +// The same WaitGroup used to trigger that optional closing is returned for any other synchronization purposes. +func SliceOfChannelsRawMergerWithoutMapping[ResultType any](individualResultsChannels []<-chan ResultType, outputChannel chan<- ResultType, closeWhenDone bool) *sync.WaitGroup { + return SliceOfChannelsRawMerger(individualResultsChannels, outputChannel, func(v ResultType) (ResultType, error) { return v, nil }, closeWhenDone) +} + +// This function is used to merge the results of a slice of channels of a specific result type down to a single succcess result channel of a second type, and an error channel +// mappingFn allows the caller to convert from the input type to the output type +// This variant is designed to be aware of concurrency.ErrorOr[T], splitting successes from failures. +// if closeWhenDone is set to true, the output channel will be closed when all individual result channels of the slice have been closed - otherwise it will be left open for future use. +// The same WaitGroup used to trigger that optional closing is returned for any other synchronization purposes. +func SliceOfChannelsMergerWithErrors[IndividualResultType any, OutputResultType any](individualResultChannels []<-chan ErrorOr[IndividualResultType], successChannel chan<- OutputResultType, errorChannel chan<- error, mappingFn func(IndividualResultType) (OutputResultType, error), closeWhenDone bool) *sync.WaitGroup { + var wg sync.WaitGroup + wg.Add(len(individualResultChannels)) + mergingFn := func(c <-chan ErrorOr[IndividualResultType]) { + for r := range c { + if r.Error != nil { + errorChannel <- r.Error + } else { + mv, err := mappingFn(r.Value) + if err != nil { + errorChannel <- err + } else { + successChannel <- mv + } + } + } + wg.Done() + } + for _, irc := range individualResultChannels { + go mergingFn(irc) + } + if closeWhenDone { + go func() { + wg.Wait() + close(successChannel) + close(errorChannel) + }() + } + return &wg +} + +// This function is used to reduce down the results of a slice of channels of a specific result type down to a single result value of a second type. +// reducerFn allows the caller to convert from the input type to the output type +// if closeWhenDone is set to true, the output channel will be closed when all individual result channels of the slice have been closed - otherwise it will be left open for future use. +// The same WaitGroup used to trigger that optional closing is returned for any other synchronization purposes. +func SliceOfChannelsReducer[InputResultType any, OutputResultType any](individualResultsChannels []<-chan InputResultType, outputChannel chan<- OutputResultType, + reducerFn func(iv InputResultType, ov OutputResultType) OutputResultType, initialValue OutputResultType, closeWhenDone bool) (wg *sync.WaitGroup) { + wg = &sync.WaitGroup{} + wg.Add(len(individualResultsChannels)) + reduceLock := sync.Mutex{} + reducingFn := func(c <-chan InputResultType) { + for iv := range c { + reduceLock.Lock() + initialValue = reducerFn(iv, initialValue) + reduceLock.Unlock() + } + wg.Done() + } + for _, irc := range individualResultsChannels { + go reducingFn(irc) + } + go func() { + wg.Wait() + outputChannel <- initialValue + if closeWhenDone { + close(outputChannel) + } + }() + return wg +} + +// This function is primarily designed to be used in combination with the above utility functions. +// A slice of input result channels of a specific type is provided, along with a function to map those values to another type +// A slice of output result channels is returned, where each value is mapped as it comes in. +// The order of the slice will be retained. +func SliceOfChannelsTransformer[InputResultType any, OutputResultType any](inputChanels []<-chan InputResultType, mappingFn func(v InputResultType) OutputResultType) (outputChannels []<-chan OutputResultType) { + rawOutputChannels := make([]<-chan OutputResultType, len(inputChanels)) + + transformingFn := func(ic <-chan InputResultType, oc chan OutputResultType) { + for iv := range ic { + oc <- mappingFn(iv) + } + close(oc) + } + + for ci, c := range inputChanels { + roc := make(chan OutputResultType) + go transformingFn(c, roc) + rawOutputChannels[ci] = roc + } + + outputChannels = rawOutputChannels + return +} diff --git a/pkg/concurrency/concurrency_test.go b/pkg/concurrency/concurrency_test.go new file mode 100644 index 00000000..fedd74be --- /dev/null +++ b/pkg/concurrency/concurrency_test.go @@ -0,0 +1,101 @@ +package concurrency_test + +// TODO: noramlly, these go in utils_tests, right? Why does this cause problems only in pkg/utils? + +import ( + "fmt" + "slices" + + . "github.com/go-skynet/LocalAI/pkg/concurrency" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("utils/concurrency tests", func() { + It("SliceOfChannelsReducer works", func() { + individualResultsChannels := []<-chan int{} + initialValue := 0 + for i := 0; i < 3; i++ { + c := make(chan int) + go func(i int, c chan int) { + for ii := 1; ii < 4; ii++ { + c <- (i * ii) + } + close(c) + }(i, c) + individualResultsChannels = append(individualResultsChannels, c) + } + Expect(len(individualResultsChannels)).To(Equal(3)) + finalResultChannel := make(chan int) + wg := SliceOfChannelsReducer[int, int](individualResultsChannels, finalResultChannel, func(input int, val int) int { + return val + input + }, initialValue, true) + + Expect(wg).ToNot(BeNil()) + + result := <-finalResultChannel + + Expect(result).ToNot(Equal(0)) + Expect(result).To(Equal(18)) + }) + + It("SliceOfChannelsRawMergerWithoutMapping works", func() { + individualResultsChannels := []<-chan int{} + for i := 0; i < 3; i++ { + c := make(chan int) + go func(i int, c chan int) { + for ii := 1; ii < 4; ii++ { + c <- (i * ii) + } + close(c) + }(i, c) + individualResultsChannels = append(individualResultsChannels, c) + } + Expect(len(individualResultsChannels)).To(Equal(3)) + outputChannel := make(chan int) + wg := SliceOfChannelsRawMergerWithoutMapping(individualResultsChannels, outputChannel, true) + Expect(wg).ToNot(BeNil()) + outputSlice := []int{} + for v := range outputChannel { + outputSlice = append(outputSlice, v) + } + Expect(len(outputSlice)).To(Equal(9)) + slices.Sort(outputSlice) + Expect(outputSlice[0]).To(BeZero()) + Expect(outputSlice[3]).To(Equal(1)) + Expect(outputSlice[8]).To(Equal(6)) + }) + + It("SliceOfChannelsTransformer works", func() { + individualResultsChannels := []<-chan int{} + for i := 0; i < 3; i++ { + c := make(chan int) + go func(i int, c chan int) { + for ii := 1; ii < 4; ii++ { + c <- (i * ii) + } + close(c) + }(i, c) + individualResultsChannels = append(individualResultsChannels, c) + } + Expect(len(individualResultsChannels)).To(Equal(3)) + mappingFn := func(i int) string { + return fmt.Sprintf("$%d", i) + } + + outputChannels := SliceOfChannelsTransformer(individualResultsChannels, mappingFn) + Expect(len(outputChannels)).To(Equal(3)) + rSlice := []string{} + for ii := 1; ii < 4; ii++ { + for i := 0; i < 3; i++ { + res := <-outputChannels[i] + rSlice = append(rSlice, res) + } + } + slices.Sort(rSlice) + Expect(rSlice[0]).To(Equal("$0")) + Expect(rSlice[3]).To(Equal("$1")) + Expect(rSlice[8]).To(Equal("$6")) + }) +}) diff --git a/pkg/concurrency/types.go b/pkg/concurrency/types.go new file mode 100644 index 00000000..76081ba3 --- /dev/null +++ b/pkg/concurrency/types.go @@ -0,0 +1,6 @@ +package concurrency + +type ErrorOr[T any] struct { + Value T + Error error +} diff --git a/pkg/grpc/backend.go b/pkg/grpc/backend.go index 8fb8c39d..49a6b1bd 100644 --- a/pkg/grpc/backend.go +++ b/pkg/grpc/backend.go @@ -41,7 +41,7 @@ type Backend interface { PredictStream(ctx context.Context, in *pb.PredictOptions, f func(s []byte), opts ...grpc.CallOption) error GenerateImage(ctx context.Context, in *pb.GenerateImageRequest, opts ...grpc.CallOption) (*pb.Result, error) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error) - AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.Result, error) + AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.TranscriptionResult, error) TokenizeString(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.TokenizationResponse, error) Status(ctx context.Context) (*pb.StatusResponse, error) diff --git a/pkg/grpc/base/base.go b/pkg/grpc/base/base.go index 0af5d94f..c0b4bc34 100644 --- a/pkg/grpc/base/base.go +++ b/pkg/grpc/base/base.go @@ -53,8 +53,8 @@ func (llm *Base) GenerateImage(*pb.GenerateImageRequest) error { return fmt.Errorf("unimplemented") } -func (llm *Base) AudioTranscription(*pb.TranscriptRequest) (schema.Result, error) { - return schema.Result{}, fmt.Errorf("unimplemented") +func (llm *Base) AudioTranscription(*pb.TranscriptRequest) (schema.TranscriptionResult, error) { + return schema.TranscriptionResult{}, fmt.Errorf("unimplemented") } func (llm *Base) TTS(*pb.TTSRequest) error { diff --git a/pkg/grpc/client.go b/pkg/grpc/client.go index 882db12a..0e0e56c7 100644 --- a/pkg/grpc/client.go +++ b/pkg/grpc/client.go @@ -210,7 +210,7 @@ func (c *Client) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOp return client.TTS(ctx, in, opts...) } -func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.Result, error) { +func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.TranscriptionResult, error) { if !c.parallel { c.opMutex.Lock() defer c.opMutex.Unlock() @@ -231,7 +231,7 @@ func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptReques if err != nil { return nil, err } - tresult := &schema.Result{} + tresult := &schema.TranscriptionResult{} for _, s := range res.Segments { tks := []int{} for _, t := range s.Tokens { diff --git a/pkg/grpc/embed.go b/pkg/grpc/embed.go index 73b185a3..b4ba4884 100644 --- a/pkg/grpc/embed.go +++ b/pkg/grpc/embed.go @@ -53,12 +53,12 @@ func (e *embedBackend) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc. return e.s.TTS(ctx, in) } -func (e *embedBackend) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.Result, error) { +func (e *embedBackend) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.TranscriptionResult, error) { r, err := e.s.AudioTranscription(ctx, in) if err != nil { return nil, err } - tr := &schema.Result{} + tr := &schema.TranscriptionResult{} for _, s := range r.Segments { var tks []int for _, t := range s.Tokens { diff --git a/pkg/grpc/interface.go b/pkg/grpc/interface.go index 4d06544d..aa7a3fbc 100644 --- a/pkg/grpc/interface.go +++ b/pkg/grpc/interface.go @@ -15,7 +15,7 @@ type LLM interface { Load(*pb.ModelOptions) error Embeddings(*pb.PredictOptions) ([]float32, error) GenerateImage(*pb.GenerateImageRequest) error - AudioTranscription(*pb.TranscriptRequest) (schema.Result, error) + AudioTranscription(*pb.TranscriptRequest) (schema.TranscriptionResult, error) TTS(*pb.TTSRequest) error TokenizeString(*pb.PredictOptions) (pb.TokenizationResponse, error) Status() (pb.StatusResponse, error) diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index 5d9808a4..617d8f62 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -81,7 +81,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string if _, err := os.Stat(uri); err == nil { serverAddress, err := getFreeAddress() if err != nil { - return "", fmt.Errorf("failed allocating free ports: %s", err.Error()) + return "", fmt.Errorf("%s failed allocating free ports: %s", backend, err.Error()) } // Make sure the process is executable if err := ml.startProcess(uri, o.model, serverAddress); err != nil { @@ -134,7 +134,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string if !ready { log.Debug().Msgf("GRPC Service NOT ready") - return "", fmt.Errorf("grpc service not ready") + return "", fmt.Errorf("%s grpc service not ready", backend) } options := *o.gRPCOptions @@ -145,10 +145,10 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string res, err := client.GRPC(o.parallelRequests, ml.wd).LoadModel(o.context, &options) if err != nil { - return "", fmt.Errorf("could not load model: %w", err) + return "", fmt.Errorf("\"%s\" could not load model: %w", backend, err) } if !res.Success { - return "", fmt.Errorf("could not load model (no success): %s", res.Message) + return "", fmt.Errorf("\"%s\" could not load model (no success): %s", backend, res.Message) } return client, nil diff --git a/pkg/startup/model_preload.go b/pkg/startup/model_preload.go deleted file mode 100644 index b09516a7..00000000 --- a/pkg/startup/model_preload.go +++ /dev/null @@ -1,85 +0,0 @@ -package startup - -import ( - "errors" - "os" - "path/filepath" - - "github.com/go-skynet/LocalAI/embedded" - "github.com/go-skynet/LocalAI/pkg/downloader" - "github.com/go-skynet/LocalAI/pkg/utils" - "github.com/rs/zerolog/log" -) - -// PreloadModelsConfigurations will preload models from the given list of URLs -// It will download the model if it is not already present in the model path -// It will also try to resolve if the model is an embedded model YAML configuration -func PreloadModelsConfigurations(modelLibraryURL string, modelPath string, models ...string) { - for _, url := range models { - - // As a best effort, try to resolve the model from the remote library - // if it's not resolved we try with the other method below - if modelLibraryURL != "" { - lib, err := embedded.GetRemoteLibraryShorteners(modelLibraryURL) - if err == nil { - if lib[url] != "" { - log.Debug().Msgf("[startup] model configuration is defined remotely: %s (%s)", url, lib[url]) - url = lib[url] - } - } - } - - url = embedded.ModelShortURL(url) - switch { - case embedded.ExistsInModelsLibrary(url): - modelYAML, err := embedded.ResolveContent(url) - // If we resolve something, just save it to disk and continue - if err != nil { - log.Error().Err(err).Msg("error resolving model content") - continue - } - - log.Debug().Msgf("[startup] resolved embedded model: %s", url) - md5Name := utils.MD5(url) - modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml" - if err := os.WriteFile(modelDefinitionFilePath, modelYAML, os.ModePerm); err != nil { - log.Error().Err(err).Str("filepath", modelDefinitionFilePath).Msg("error writing model definition") - } - case downloader.LooksLikeURL(url): - log.Debug().Msgf("[startup] resolved model to download: %s", url) - - // md5 of model name - md5Name := utils.MD5(url) - - // check if file exists - if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) { - modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml" - err := downloader.DownloadFile(url, modelDefinitionFilePath, "", func(fileName, current, total string, percent float64) { - utils.DisplayDownloadFunction(fileName, current, total, percent) - }) - if err != nil { - log.Error().Err(err).Str("url", url).Str("filepath", modelDefinitionFilePath).Msg("error downloading model") - } - } - default: - if _, err := os.Stat(url); err == nil { - log.Debug().Msgf("[startup] resolved local model: %s", url) - // copy to modelPath - md5Name := utils.MD5(url) - - modelYAML, err := os.ReadFile(url) - if err != nil { - log.Error().Err(err).Str("filepath", url).Msg("error reading model definition") - continue - } - - modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml" - if err := os.WriteFile(modelDefinitionFilePath, modelYAML, os.ModePerm); err != nil { - log.Error().Err(err).Str("filepath", modelDefinitionFilePath).Msg("error loading model: %s") - } - } else { - log.Warn().Msgf("[startup] failed resolving model '%s'", url) - } - } - } -} diff --git a/pkg/utils/base64.go b/pkg/utils/base64.go new file mode 100644 index 00000000..769d8a88 --- /dev/null +++ b/pkg/utils/base64.go @@ -0,0 +1,50 @@ +package utils + +import ( + "encoding/base64" + "fmt" + "io" + "net/http" + "strings" + "time" +) + +var base64DownloadClient http.Client = http.Client{ + Timeout: 30 * time.Second, +} + +// this function check if the string is an URL, if it's an URL downloads the image in memory +// encodes it in base64 and returns the base64 string + +// This may look weird down in pkg/utils while it is currently only used in core/config +// +// but I believe it may be useful for MQTT as well in the near future, so I'm +// extracting it while I'm thinking of it. +func GetImageURLAsBase64(s string) (string, error) { + if strings.HasPrefix(s, "http") { + // download the image + resp, err := base64DownloadClient.Get(s) + if err != nil { + return "", err + } + defer resp.Body.Close() + + // read the image data into memory + data, err := io.ReadAll(resp.Body) + if err != nil { + return "", err + } + + // encode the image data in base64 + encoded := base64.StdEncoding.EncodeToString(data) + + // return the base64 string + return encoded, nil + } + + // if the string instead is prefixed with "data:image/jpeg;base64,", drop it + if strings.HasPrefix(s, "data:image/jpeg;base64,") { + return strings.ReplaceAll(s, "data:image/jpeg;base64,", ""), nil + } + return "", fmt.Errorf("not valid string") +} From f1f39eea3fd915e8ccc29ad8fa9d20c003ef8ed3 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 13 Apr 2024 09:47:33 +0200 Subject: [PATCH 0045/2648] Create localaibot_automerge.yml Signed-off-by: Ettore Di Giacinto --- .github/workflows/localaibot_automerge.yml | 42 ++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 .github/workflows/localaibot_automerge.yml diff --git a/.github/workflows/localaibot_automerge.yml b/.github/workflows/localaibot_automerge.yml new file mode 100644 index 00000000..a540997b --- /dev/null +++ b/.github/workflows/localaibot_automerge.yml @@ -0,0 +1,42 @@ +name: Dependabot auto-merge +on: +- pull_request_target + +permissions: + contents: write + pull-requests: write + packages: read + +jobs: + dependabot: + runs-on: ubuntu-latest + if: ${{ github.actor == 'localai-bot' }} + steps: + - name: Dependabot metadata + id: metadata + uses: dependabot/fetch-metadata@v1.3.4 + with: + github-token: "${{ secrets.GITHUB_TOKEN }}" + skip-commit-verification: true + + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Approve a PR if not already approved + run: | + gh pr checkout "$PR_URL" + if [ "$(gh pr status --json reviewDecision -q .currentBranch.reviewDecision)" != "APPROVED" ]; + then + gh pr review --approve "$PR_URL" + else + echo "PR already approved."; + fi + env: + PR_URL: ${{github.event.pull_request.html_url}} + GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} + + - name: Enable auto-merge for LocalAIBot PRs + run: gh pr merge --auto --squash "$PR_URL" + env: + PR_URL: ${{github.event.pull_request.html_url}} + GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} From 95244ed6e7598db09fa8974052f550bb1dcc9d8e Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 13 Apr 2024 10:03:15 +0200 Subject: [PATCH 0046/2648] Update localaibot_automerge.yml Signed-off-by: Ettore Di Giacinto --- .github/workflows/localaibot_automerge.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/localaibot_automerge.yml b/.github/workflows/localaibot_automerge.yml index a540997b..98629ab7 100644 --- a/.github/workflows/localaibot_automerge.yml +++ b/.github/workflows/localaibot_automerge.yml @@ -1,4 +1,4 @@ -name: Dependabot auto-merge +name: LocalAI-bot auto-merge on: - pull_request_target From 4e74560649b0cb54fd1ab03d3a7a4105e2dd01fd Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 13 Apr 2024 13:30:40 +0200 Subject: [PATCH 0047/2648] ci: fix release pipeline missing dependencies (#2025) --- .github/workflows/release.yaml | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 3c1cea44..33c640cc 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -1,6 +1,8 @@ name: Build and Release -on: push +on: +- push +- pull_request env: GRPC_VERSION: v1.58.0 @@ -40,7 +42,7 @@ jobs: - name: Dependencies run: | sudo apt-get update - sudo apt-get install build-essential ffmpeg + sudo apt-get install build-essential ffmpeg protobuf-compiler - name: Install CUDA Dependencies if: ${{ matrix.build == 'cuda12' || matrix.build == 'cuda11' }} run: | @@ -75,6 +77,9 @@ jobs: CMAKE_ARGS: "${{ matrix.defines }}" BUILD_ID: "${{ matrix.build }}" run: | + go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest + go install google.golang.org/protobuf/cmd/protoc-gen-go@latest + export PATH=$PATH:$GOPATH/bin if [ "${{ matrix.build }}" == "cuda12" ] || [ "${{ matrix.build }}" == "cuda11" ]; then export BUILD_TYPE=cublas export PATH=/usr/local/cuda/bin:$PATH @@ -106,9 +111,12 @@ jobs: cache: false - name: Dependencies run: | - sudo apt-get install -y --no-install-recommends libopencv-dev + sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler + go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest + go install google.golang.org/protobuf/cmd/protoc-gen-go@latest - name: Build stablediffusion run: | + export PATH=$PATH:$GOPATH/bin make backend-assets/grpc/stablediffusion mkdir -p release && cp backend-assets/grpc/stablediffusion release - uses: actions/upload-artifact@v4 @@ -139,6 +147,8 @@ jobs: - name: Dependencies run: | brew install protobuf grpc + go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest + go install google.golang.org/protobuf/cmd/protoc-gen-go@latest - name: Build id: build env: @@ -147,6 +157,7 @@ jobs: run: | export C_INCLUDE_PATH=/usr/local/include export CPLUS_INCLUDE_PATH=/usr/local/include + export PATH=$PATH:$GOPATH/bin make dist - uses: actions/upload-artifact@v4 with: @@ -183,6 +194,8 @@ jobs: - name: Dependencies run: | brew install protobuf grpc + go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest + go install google.golang.org/protobuf/cmd/protoc-gen-go@latest - name: Build id: build env: @@ -191,6 +204,7 @@ jobs: run: | export C_INCLUDE_PATH=/usr/local/include export CPLUS_INCLUDE_PATH=/usr/local/include + export PATH=$PATH:$GOPATH/bin make dist - uses: actions/upload-artifact@v4 with: From b91820b7f88173e532af8de509d43dd6191a2386 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 13 Apr 2024 13:46:07 +0200 Subject: [PATCH 0048/2648] Update localaibot_automerge.yml Signed-off-by: Ettore Di Giacinto --- .github/workflows/localaibot_automerge.yml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/.github/workflows/localaibot_automerge.yml b/.github/workflows/localaibot_automerge.yml index 98629ab7..74c725f3 100644 --- a/.github/workflows/localaibot_automerge.yml +++ b/.github/workflows/localaibot_automerge.yml @@ -12,13 +12,6 @@ jobs: runs-on: ubuntu-latest if: ${{ github.actor == 'localai-bot' }} steps: - - name: Dependabot metadata - id: metadata - uses: dependabot/fetch-metadata@v1.3.4 - with: - github-token: "${{ secrets.GITHUB_TOKEN }}" - skip-commit-verification: true - - name: Checkout repository uses: actions/checkout@v3 From 619f2517a490a1a3448cf5df837a8229b232287a Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sat, 13 Apr 2024 15:47:39 +0200 Subject: [PATCH 0049/2648] :arrow_up: Update ggerganov/llama.cpp (#2028) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9f86ef23..1b59c604 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=ab9a3240a9da941fdef5cd4a25f2b97c2f5a67aa +CPPLLAMA_VERSION?=4bd0f93e4ab4fe6682e7d0241c1bdec1397e954a # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 0fdff269241d5ce93f325a48691bf9ebc5b5b9e6 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 13 Apr 2024 18:59:21 +0200 Subject: [PATCH 0050/2648] feat(parler-tts): Add new backend (#2027) * feat(parler-tts): Add new backend Signed-off-by: Ettore Di Giacinto * feat(parler-tts): try downgrade protobuf Signed-off-by: Ettore Di Giacinto * feat(parler-tts): add parler conda env Signed-off-by: Ettore Di Giacinto * Revert "feat(parler-tts): try downgrade protobuf" This reverts commit bd5941d5cfc00676b45a99f71debf3c34249cf3c. Signed-off-by: Ettore Di Giacinto * deps: add grpc Signed-off-by: Ettore Di Giacinto * fix: try to gen proto with same environment * workaround * Revert "fix: try to gen proto with same environment" This reverts commit 998c745e2f475ec3ec43ac017bcebf3a7ce15b8b. * Workaround fixup --------- Signed-off-by: Ettore Di Giacinto Co-authored-by: Dave --- .github/workflows/test-extra.yml | 29 ++++ Dockerfile | 5 +- Makefile | 13 +- backend/python/parler-tts/Makefile | 39 ++++++ backend/python/parler-tts/install.sh | 39 ++++++ backend/python/parler-tts/parler-nvidia.yml | 48 +++++++ backend/python/parler-tts/parler.yml | 36 +++++ .../python/parler-tts/parler_tts_server.py | 125 ++++++++++++++++++ backend/python/parler-tts/run.sh | 16 +++ backend/python/parler-tts/test.sh | 11 ++ backend/python/parler-tts/test_parler.py | 81 ++++++++++++ backend/python/transformers-musicgen/run.sh | 2 +- 12 files changed, 440 insertions(+), 4 deletions(-) create mode 100644 backend/python/parler-tts/Makefile create mode 100755 backend/python/parler-tts/install.sh create mode 100644 backend/python/parler-tts/parler-nvidia.yml create mode 100644 backend/python/parler-tts/parler.yml create mode 100644 backend/python/parler-tts/parler_tts_server.py create mode 100644 backend/python/parler-tts/run.sh create mode 100644 backend/python/parler-tts/test.sh create mode 100644 backend/python/parler-tts/test_parler.py diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml index 7705783e..fa45cb3c 100644 --- a/.github/workflows/test-extra.yml +++ b/.github/workflows/test-extra.yml @@ -104,6 +104,35 @@ jobs: make --jobs=5 --output-sync=target -C backend/python/diffusers make --jobs=5 --output-sync=target -C backend/python/diffusers test + tests-parler-tts: + runs-on: ubuntu-latest + steps: + - name: Clone + uses: actions/checkout@v4 + with: + submodules: true + - name: Dependencies + run: | + sudo apt-get update + sudo apt-get install build-essential ffmpeg + curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \ + sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \ + gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \ + sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \ + sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \ + sudo apt-get update && \ + sudo apt-get install -y conda + sudo apt-get install -y ca-certificates cmake curl patch python3-pip + sudo apt-get install -y libopencv-dev + pip install --user grpcio-tools + + sudo rm -rfv /usr/bin/conda || true + + - name: Test parler-tts + run: | + export PATH=$PATH:/opt/conda/bin + make --jobs=5 --output-sync=target -C backend/python/parler-tts + make --jobs=5 --output-sync=target -C backend/python/parler-tts test tests-transformers-musicgen: runs-on: ubuntu-latest diff --git a/Dockerfile b/Dockerfile index d0217d50..397fbe22 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,7 +15,7 @@ ARG TARGETVARIANT ENV BUILD_TYPE=${BUILD_TYPE} ENV DEBIAN_FRONTEND=noninteractive -ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh" +ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh" ARG GO_TAGS="stablediffusion tinydream tts" @@ -275,6 +275,9 @@ RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ make -C backend/python/transformers-musicgen \ ; fi +RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ + make -C backend/python/parler-tts \ + ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ make -C backend/python/coqui \ ; fi diff --git a/Makefile b/Makefile index 1b59c604..d5bc3739 100644 --- a/Makefile +++ b/Makefile @@ -439,10 +439,10 @@ protogen-go-clean: $(RM) bin/* .PHONY: protogen-python -protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen sentencetransformers-protogen transformers-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen +protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen .PHONY: protogen-python-clean -protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean +protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean .PHONY: autogptq-protogen autogptq-protogen: @@ -524,6 +524,14 @@ transformers-protogen: transformers-protogen-clean: $(MAKE) -C backend/python/transformers protogen-clean +.PHONY: parler-tts-protogen +parler-tts-protogen: + $(MAKE) -C backend/python/parler-tts protogen + +.PHONY: parler-tts-protogen-clean +parler-tts-protogen-clean: + $(MAKE) -C backend/python/parler-tts protogen-clean + .PHONY: transformers-musicgen-protogen transformers-musicgen-protogen: $(MAKE) -C backend/python/transformers-musicgen protogen @@ -560,6 +568,7 @@ prepare-extra-conda-environments: protogen-python $(MAKE) -C backend/python/sentencetransformers $(MAKE) -C backend/python/transformers $(MAKE) -C backend/python/transformers-musicgen + $(MAKE) -C backend/python/parler-tts $(MAKE) -C backend/python/vall-e-x $(MAKE) -C backend/python/exllama $(MAKE) -C backend/python/petals diff --git a/backend/python/parler-tts/Makefile b/backend/python/parler-tts/Makefile new file mode 100644 index 00000000..4497762e --- /dev/null +++ b/backend/python/parler-tts/Makefile @@ -0,0 +1,39 @@ +export CONDA_ENV_PATH = "parler.yml" +SKIP_CONDA?=0 +ifeq ($(BUILD_TYPE), cublas) +export CONDA_ENV_PATH = "parler-nvidia.yml" +endif + +# Intel GPU are supposed to have dependencies installed in the main python +# environment, so we skip conda installation for SYCL builds. +# https://github.com/intel/intel-extension-for-pytorch/issues/538 +ifneq (,$(findstring sycl,$(BUILD_TYPE))) +export SKIP_CONDA=1 +endif + +.PHONY: parler-tts +parler-tts: protogen + @echo "Installing $(CONDA_ENV_PATH)..." + bash install.sh $(CONDA_ENV_PATH) + +.PHONY: run +run: protogen + @echo "Running transformers..." + bash run.sh + @echo "transformers run." + +.PHONY: test +test: protogen + @echo "Testing transformers..." + bash test.sh + @echo "transformers tested." + +.PHONY: protogen +protogen: backend_pb2_grpc.py backend_pb2.py + +.PHONY: protogen-clean +protogen-clean: + $(RM) backend_pb2_grpc.py backend_pb2.py + +backend_pb2_grpc.py backend_pb2.py: + python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto \ No newline at end of file diff --git a/backend/python/parler-tts/install.sh b/backend/python/parler-tts/install.sh new file mode 100755 index 00000000..b9965b23 --- /dev/null +++ b/backend/python/parler-tts/install.sh @@ -0,0 +1,39 @@ +#!/bin/bash +set -ex + +SKIP_CONDA=${SKIP_CONDA:-0} + +# Check if environment exist +conda_env_exists(){ + ! conda list --name "${@}" >/dev/null 2>/dev/null +} + +if [ $SKIP_CONDA -eq 1 ]; then + echo "Skipping conda environment installation" +else + export PATH=$PATH:/opt/conda/bin + if conda_env_exists "parler" ; then + echo "Creating virtual environment..." + conda env create --name parler --file $1 + echo "Virtual environment created." + else + echo "Virtual environment already exists." + fi +fi + +if [ $SKIP_CONDA -ne 1 ]; then + # Activate conda environment + source activate parler + # https://github.com/descriptinc/audiotools/issues/101 + # incompatible protobuf versions. + curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o $CONDA_PREFIX/lib/python3.11/site-packages/google/protobuf/internal/builder.py +fi + +if [ "$PIP_CACHE_PURGE" = true ] ; then + if [ $SKIP_CONDA -ne 1 ]; then + # Activate conda environment + source activate parler + fi + + pip cache purge +fi \ No newline at end of file diff --git a/backend/python/parler-tts/parler-nvidia.yml b/backend/python/parler-tts/parler-nvidia.yml new file mode 100644 index 00000000..ed925e94 --- /dev/null +++ b/backend/python/parler-tts/parler-nvidia.yml @@ -0,0 +1,48 @@ +name: parler +channels: + - defaults +dependencies: + - _libgcc_mutex=0.1=main + - _openmp_mutex=5.1=1_gnu + - bzip2=1.0.8=h7b6447c_0 + - ca-certificates=2023.08.22=h06a4308_0 + - ld_impl_linux-64=2.38=h1181459_1 + - libffi=3.4.4=h6a678d5_0 + - libgcc-ng=11.2.0=h1234567_1 + - libgomp=11.2.0=h1234567_1 + - libstdcxx-ng=11.2.0=h1234567_1 + - libuuid=1.41.5=h5eee18b_0 + - ncurses=6.4=h6a678d5_0 + - openssl=3.0.11=h7f8727e_2 + - pip=23.2.1=py311h06a4308_0 + - python=3.11.5=h955ad1f_0 + - readline=8.2=h5eee18b_0 + - setuptools=68.0.0=py311h06a4308_0 + - sqlite=3.41.2=h5eee18b_0 + - tk=8.6.12=h1ccaba5_0 + - tzdata=2023c=h04d1e81_0 + - wheel=0.41.2=py311h06a4308_0 + - xz=5.4.2=h5eee18b_0 + - zlib=1.2.13=h5eee18b_0 + - pip: + - accelerate>=0.11.0 + - grpcio==1.59.0 + - numpy==1.26.0 + - nvidia-cublas-cu12==12.1.3.1 + - nvidia-cuda-cupti-cu12==12.1.105 + - nvidia-cuda-nvrtc-cu12==12.1.105 + - nvidia-cuda-runtime-cu12==12.1.105 + - nvidia-cudnn-cu12==8.9.2.26 + - nvidia-cufft-cu12==11.0.2.54 + - nvidia-curand-cu12==10.3.2.106 + - nvidia-cusolver-cu12==11.4.5.107 + - nvidia-cusparse-cu12==12.1.0.106 + - nvidia-nccl-cu12==2.18.1 + - nvidia-nvjitlink-cu12==12.2.140 + - nvidia-nvtx-cu12==12.1.105 + - torch==2.1.0 + - transformers>=4.34.0 + - descript-audio-codec + - sentencepiece + - git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16 +prefix: /opt/conda/envs/diffusers diff --git a/backend/python/parler-tts/parler.yml b/backend/python/parler-tts/parler.yml new file mode 100644 index 00000000..fd0c3cb6 --- /dev/null +++ b/backend/python/parler-tts/parler.yml @@ -0,0 +1,36 @@ +name: parler +channels: + - defaults +dependencies: + - _libgcc_mutex=0.1=main + - _openmp_mutex=5.1=1_gnu + - bzip2=1.0.8=h7b6447c_0 + - ca-certificates=2023.08.22=h06a4308_0 + - ld_impl_linux-64=2.38=h1181459_1 + - libffi=3.4.4=h6a678d5_0 + - libgcc-ng=11.2.0=h1234567_1 + - libgomp=11.2.0=h1234567_1 + - libstdcxx-ng=11.2.0=h1234567_1 + - libuuid=1.41.5=h5eee18b_0 + - ncurses=6.4=h6a678d5_0 + - openssl=3.0.11=h7f8727e_2 + - pip=23.2.1=py311h06a4308_0 + - python=3.11.5=h955ad1f_0 + - readline=8.2=h5eee18b_0 + - setuptools=68.0.0=py311h06a4308_0 + - sqlite=3.41.2=h5eee18b_0 + - tk=8.6.12=h1ccaba5_0 + - tzdata=2023c=h04d1e81_0 + - wheel=0.41.2=py311h06a4308_0 + - xz=5.4.2=h5eee18b_0 + - zlib=1.2.13=h5eee18b_0 + - pip: + - accelerate>=0.11.0 + - numpy==1.26.0 + - grpcio==1.59.0 + - torch==2.1.0 + - transformers>=4.34.0 + - descript-audio-codec + - sentencepiece + - git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16 +prefix: /opt/conda/envs/parler diff --git a/backend/python/parler-tts/parler_tts_server.py b/backend/python/parler-tts/parler_tts_server.py new file mode 100644 index 00000000..655990d7 --- /dev/null +++ b/backend/python/parler-tts/parler_tts_server.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python3 +""" +Extra gRPC server for MusicgenForConditionalGeneration models. +""" +from concurrent import futures + +import argparse +import signal +import sys +import os + +import time +import backend_pb2 +import backend_pb2_grpc + +import grpc + +from scipy.io.wavfile import write as write_wav + +from parler_tts import ParlerTTSForConditionalGeneration +from transformers import AutoTokenizer +import soundfile as sf +import torch + +_ONE_DAY_IN_SECONDS = 60 * 60 * 24 + +# If MAX_WORKERS are specified in the environment use it, otherwise default to 1 +MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1')) + +# Implement the BackendServicer class with the service methods +class BackendServicer(backend_pb2_grpc.BackendServicer): + """ + A gRPC servicer for the backend service. + + This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding. + """ + def Health(self, request, context): + """ + A gRPC method that returns the health status of the backend service. + + Args: + request: A HealthRequest object that contains the request parameters. + context: A grpc.ServicerContext object that provides information about the RPC. + + Returns: + A Reply object that contains the health status of the backend service. + """ + return backend_pb2.Reply(message=bytes("OK", 'utf-8')) + + def LoadModel(self, request, context): + """ + A gRPC method that loads a model into memory. + + Args: + request: A LoadModelRequest object that contains the request parameters. + context: A grpc.ServicerContext object that provides information about the RPC. + + Returns: + A Result object that contains the result of the LoadModel operation. + """ + model_name = request.Model + device = "cuda:0" if torch.cuda.is_available() else "cpu" + try: + self.model = ParlerTTSForConditionalGeneration.from_pretrained(model_name).to(device) + self.tokenizer = AutoTokenizer.from_pretrained(model_name) + except Exception as err: + return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") + + return backend_pb2.Result(message="Model loaded successfully", success=True) + + def TTS(self, request, context): + model_name = request.model + voice = request.voice + if voice == "": + voice = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast." + if model_name == "": + return backend_pb2.Result(success=False, message="request.model is required") + try: + device = "cuda:0" if torch.cuda.is_available() else "cpu" + input_ids = self.tokenizer(voice, return_tensors="pt").input_ids.to(device) + prompt_input_ids = self.tokenizer(request.text, return_tensors="pt").input_ids.to(device) + + generation = self.model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids) + audio_arr = generation.cpu().numpy().squeeze() + print("[parler-tts] TTS generated!", file=sys.stderr) + sf.write(request.dst, audio_arr, self.model.config.sampling_rate) + print("[parler-tts] TTS saved to", request.dst, file=sys.stderr) + print("[parler-tts] TTS for", file=sys.stderr) + print(request, file=sys.stderr) + except Exception as err: + return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") + return backend_pb2.Result(success=True) + + +def serve(address): + server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)) + backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server) + server.add_insecure_port(address) + server.start() + print("[parler-tts] Server started. Listening on: " + address, file=sys.stderr) + + # Define the signal handler function + def signal_handler(sig, frame): + print("[parler-tts] Received termination signal. Shutting down...") + server.stop(0) + sys.exit(0) + + # Set the signal handlers for SIGINT and SIGTERM + signal.signal(signal.SIGINT, signal_handler) + signal.signal(signal.SIGTERM, signal_handler) + + try: + while True: + time.sleep(_ONE_DAY_IN_SECONDS) + except KeyboardInterrupt: + server.stop(0) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Run the gRPC server.") + parser.add_argument( + "--addr", default="localhost:50051", help="The address to bind the server to." + ) + args = parser.parse_args() + print(f"[parler-tts] startup: {args}", file=sys.stderr) + serve(args.addr) diff --git a/backend/python/parler-tts/run.sh b/backend/python/parler-tts/run.sh new file mode 100644 index 00000000..08e42198 --- /dev/null +++ b/backend/python/parler-tts/run.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +## +## A bash script wrapper that runs the parler-tts server with conda + +echo "Launching gRPC server for parler-tts" + +export PATH=$PATH:/opt/conda/bin + +# Activate conda environment +source activate parler + +# get the directory where the bash script is located +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + +python $DIR/parler_tts_server.py $@ diff --git a/backend/python/parler-tts/test.sh b/backend/python/parler-tts/test.sh new file mode 100644 index 00000000..1bd15fd1 --- /dev/null +++ b/backend/python/parler-tts/test.sh @@ -0,0 +1,11 @@ +#!/bin/bash +## +## A bash script wrapper that runs the transformers server with conda + +# Activate conda environment +source activate parler + +# get the directory where the bash script is located +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + +python -m unittest $DIR/test_parler.py \ No newline at end of file diff --git a/backend/python/parler-tts/test_parler.py b/backend/python/parler-tts/test_parler.py new file mode 100644 index 00000000..ce9b66ac --- /dev/null +++ b/backend/python/parler-tts/test_parler.py @@ -0,0 +1,81 @@ +""" +A test script to test the gRPC service +""" +import unittest +import subprocess +import time +import backend_pb2 +import backend_pb2_grpc + +import grpc + + +class TestBackendServicer(unittest.TestCase): + """ + TestBackendServicer is the class that tests the gRPC service + """ + def setUp(self): + """ + This method sets up the gRPC service by starting the server + """ + self.service = subprocess.Popen(["python3", "parler_tts_server.py", "--addr", "localhost:50051"]) + time.sleep(10) + + def tearDown(self) -> None: + """ + This method tears down the gRPC service by terminating the server + """ + self.service.terminate() + self.service.wait() + + def test_server_startup(self): + """ + This method tests if the server starts up successfully + """ + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.Health(backend_pb2.HealthMessage()) + self.assertEqual(response.message, b'OK') + except Exception as err: + print(err) + self.fail("Server failed to start") + finally: + self.tearDown() + + def test_load_model(self): + """ + This method tests if the model is loaded successfully + """ + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.LoadModel(backend_pb2.ModelOptions(Model="parler-tts/parler_tts_mini_v0.1")) + self.assertTrue(response.success) + self.assertEqual(response.message, "Model loaded successfully") + except Exception as err: + print(err) + self.fail("LoadModel service failed") + finally: + self.tearDown() + + def test_tts(self): + """ + This method tests if the embeddings are generated successfully + """ + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.LoadModel(backend_pb2.ModelOptions(Model="parler-tts/parler_tts_mini_v0.1")) + self.assertTrue(response.success) + tts_request = backend_pb2.TTSRequest(text="Hey, how are you doing today?") + tts_response = stub.TTS(tts_request) + self.assertIsNotNone(tts_response) + except Exception as err: + print(err) + self.fail("TTS service failed") + finally: + self.tearDown() \ No newline at end of file diff --git a/backend/python/transformers-musicgen/run.sh b/backend/python/transformers-musicgen/run.sh index d3dcb968..3d3ffcfd 100644 --- a/backend/python/transformers-musicgen/run.sh +++ b/backend/python/transformers-musicgen/run.sh @@ -8,7 +8,7 @@ echo "Launching gRPC server for transformers-musicgen" export PATH=$PATH:/opt/conda/bin # Activate conda environment -source activate transformers-musicgen +source activate transformers # get the directory where the bash script is located DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" From d5699dbf4f3041eb7f01c996c0a2b3b59319b1bc Mon Sep 17 00:00:00 2001 From: Dave Date: Sat, 13 Apr 2024 13:01:17 -0400 Subject: [PATCH 0051/2648] fix - correct checkout versions (#2029) minor fix - bump some checkout@v3 to checkout@v4 to match and clean up warnings Signed-off-by: Dave Lee --- .github/workflows/dependabot_auto.yml | 2 +- .github/workflows/localaibot_automerge.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/dependabot_auto.yml b/.github/workflows/dependabot_auto.yml index 51337d20..be3a40a8 100644 --- a/.github/workflows/dependabot_auto.yml +++ b/.github/workflows/dependabot_auto.yml @@ -20,7 +20,7 @@ jobs: skip-commit-verification: true - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Approve a PR if not already approved run: | diff --git a/.github/workflows/localaibot_automerge.yml b/.github/workflows/localaibot_automerge.yml index 74c725f3..6a1ab891 100644 --- a/.github/workflows/localaibot_automerge.yml +++ b/.github/workflows/localaibot_automerge.yml @@ -13,7 +13,7 @@ jobs: if: ${{ github.actor == 'localai-bot' }} steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Approve a PR if not already approved run: | From 6b07ded11909bf8f52a8e6de402ac8bf91201831 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 13 Apr 2024 19:12:54 +0200 Subject: [PATCH 0052/2648] Update README.md Signed-off-by: Ettore Di Giacinto --- README.md | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/README.md b/README.md index 76a5fc08..4c2f68b2 100644 --- a/README.md +++ b/README.md @@ -50,17 +50,12 @@ [Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) +- Parler-TTS: https://github.com/mudler/LocalAI/pull/2027 - Landing page: https://github.com/mudler/LocalAI/pull/1922 - Openvino support: https://github.com/mudler/LocalAI/pull/1892 - Vector store: https://github.com/mudler/LocalAI/pull/1795 - All-in-one container image: https://github.com/mudler/LocalAI/issues/1855 - Parallel function calling: https://github.com/mudler/LocalAI/pull/1726 / Tools API support: https://github.com/mudler/LocalAI/pull/1715 -- Upload file API: https://github.com/mudler/LocalAI/pull/1703 -- ROCm container images: https://github.com/mudler/LocalAI/pull/1595 / Intel GPU support (sycl, transformers, diffusers): https://github.com/mudler/LocalAI/issues/1653 -- Mamba support: https://github.com/mudler/LocalAI/pull/1589 -- Start and share models with config file: https://github.com/mudler/LocalAI/pull/1522 -- 🐸 Coqui: https://github.com/mudler/LocalAI/pull/1489 -- Img2vid https://github.com/mudler/LocalAI/pull/1442 Hot topics (looking for contributors): - Backends v2: https://github.com/mudler/LocalAI/issues/1126 From 4486db912b62e31bffe662b977a31567e62ecbfc Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 13 Apr 2024 22:57:46 +0200 Subject: [PATCH 0053/2648] Update quickstart.md Signed-off-by: Ettore Di Giacinto --- docs/content/docs/getting-started/quickstart.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/content/docs/getting-started/quickstart.md b/docs/content/docs/getting-started/quickstart.md index ff1dc6a7..ab45e5aa 100644 --- a/docs/content/docs/getting-started/quickstart.md +++ b/docs/content/docs/getting-started/quickstart.md @@ -169,7 +169,7 @@ Call functions
```bash -curl https://localhost:8080/v1/chat/completions \ +curl http://localhost:8080/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ "model": "gpt-4", From b739cbb86b9734bd62d4f63fad6583cf97059ea5 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 14 Apr 2024 10:57:07 +0200 Subject: [PATCH 0054/2648] Revert "build(deps): bump the pip group across 4 directories with 8 updates" (#2030) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Revert "build(deps): bump the pip group across 4 directories with 8 updates (…" This reverts commit e0dee52a2ab811fccc18f309a6c5fefcb4725448. --- docs/data/version.json | 2 +- examples/functions/requirements.txt | 2 +- examples/langchain-chroma/requirements.txt | 4 ++-- .../langchainpy-localai-example/requirements.txt | 12 ++++++------ examples/streamlit-bot/requirements.txt | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/data/version.json b/docs/data/version.json index 6a618115..1b6a2161 100644 --- a/docs/data/version.json +++ b/docs/data/version.json @@ -1,3 +1,3 @@ { - "version": "v2.12.4" + "version": "v2.12.3" } diff --git a/examples/functions/requirements.txt b/examples/functions/requirements.txt index 759c5b03..7164e011 100644 --- a/examples/functions/requirements.txt +++ b/examples/functions/requirements.txt @@ -1,2 +1,2 @@ -langchain==0.1.0 +langchain==0.0.234 openai==0.27.8 diff --git a/examples/langchain-chroma/requirements.txt b/examples/langchain-chroma/requirements.txt index cdf466b9..b9e649c5 100644 --- a/examples/langchain-chroma/requirements.txt +++ b/examples/langchain-chroma/requirements.txt @@ -1,4 +1,4 @@ -langchain==0.1.0 +langchain==0.0.160 openai==0.27.6 chromadb==0.3.21 -llama-index==0.9.36 \ No newline at end of file +llama-index==0.6.2 \ No newline at end of file diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt index 1e63b0bf..2de5bcf0 100644 --- a/examples/langchain/langchainpy-localai-example/requirements.txt +++ b/examples/langchain/langchainpy-localai-example/requirements.txt @@ -1,16 +1,16 @@ -aiohttp==3.9.2 +aiohttp==3.8.4 aiosignal==1.3.1 async-timeout==4.0.2 attrs==23.1.0 -certifi==2023.7.22 +certifi==2022.12.7 charset-normalizer==3.1.0 colorama==0.4.6 dataclasses-json==0.5.7 debugpy==1.6.7 frozenlist==1.3.3 greenlet==2.0.2 -idna==3.7 -langchain==0.1.0 +idna==3.4 +langchain==0.0.159 marshmallow==3.19.0 marshmallow-enum==1.5.1 multidict==6.0.4 @@ -22,11 +22,11 @@ openapi-schema-pydantic==1.2.4 packaging==23.1 pydantic==1.10.7 PyYAML==6.0 -requests==2.31.0 +requests==2.29.0 SQLAlchemy==2.0.12 tenacity==8.2.2 tqdm==4.65.0 typing-inspect==0.8.0 typing_extensions==4.5.0 -urllib3==1.26.18 +urllib3==1.26.15 yarl==1.9.2 diff --git a/examples/streamlit-bot/requirements.txt b/examples/streamlit-bot/requirements.txt index 1fcd5093..ae527c76 100644 --- a/examples/streamlit-bot/requirements.txt +++ b/examples/streamlit-bot/requirements.txt @@ -1,2 +1,2 @@ -streamlit==1.30.0 +streamlit==1.26.0 requests \ No newline at end of file From 57bd365d876ae55f950821707485f183a6f6685a Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Mon, 15 Apr 2024 01:31:43 +0200 Subject: [PATCH 0055/2648] :arrow_up: Update docs version mudler/LocalAI (#2032) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- docs/data/version.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/data/version.json b/docs/data/version.json index 1b6a2161..6a618115 100644 --- a/docs/data/version.json +++ b/docs/data/version.json @@ -1,3 +1,3 @@ { - "version": "v2.12.3" + "version": "v2.12.4" } From de3a1a0a8e1dcf22aed34cad336962ec53ed89cc Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Mon, 15 Apr 2024 01:35:44 +0200 Subject: [PATCH 0056/2648] :arrow_up: Update ggerganov/llama.cpp (#2033) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d5bc3739..04745f39 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=4bd0f93e4ab4fe6682e7d0241c1bdec1397e954a +CPPLLAMA_VERSION?=1958f7e06ca2d2e3ab5698cc67513ba359144d8e # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From e843d7df0e8b177ab122a9f7bfa7196274ccd204 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 15 Apr 2024 19:47:11 +0200 Subject: [PATCH 0057/2648] feat(grpc): return consumed token count and update response accordingly (#2035) Fixes: #1920 --- backend/backend.proto | 2 ++ backend/cpp/llama/grpc-server.cpp | 8 ++++++++ core/backend/llm.go | 6 ++++++ core/services/openai.go | 8 ++++---- 4 files changed, 20 insertions(+), 4 deletions(-) diff --git a/backend/backend.proto b/backend/backend.proto index 56d919ef..62e1a1a6 100644 --- a/backend/backend.proto +++ b/backend/backend.proto @@ -114,6 +114,8 @@ message PredictOptions { // The response message containing the result message Reply { bytes message = 1; + int32 tokens = 2; + int32 prompt_tokens = 3; } message ModelOptions { diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp index a2e39a9c..6fb08658 100644 --- a/backend/cpp/llama/grpc-server.cpp +++ b/backend/cpp/llama/grpc-server.cpp @@ -2332,6 +2332,10 @@ public: std::string completion_text = result.result_json.value("content", ""); reply.set_message(completion_text); + int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0); + reply.set_tokens(tokens_predicted); + int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0); + reply.set_prompt_tokens(tokens_evaluated); // Send the reply writer->Write(reply); @@ -2357,6 +2361,10 @@ public: task_result result = llama.queue_results.recv(task_id); if (!result.error && result.stop) { completion_text = result.result_json.value("content", ""); + int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0); + int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0); + reply->set_prompt_tokens(tokens_evaluated); + reply->set_tokens(tokens_predicted); reply->set_message(completion_text); } else diff --git a/core/backend/llm.go b/core/backend/llm.go index 1878e87a..75766d78 100644 --- a/core/backend/llm.go +++ b/core/backend/llm.go @@ -189,6 +189,12 @@ func (llmbs *LLMBackendService) Inference(ctx context.Context, req *LLMRequest, } else { go func() { reply, err := inferenceModel.Predict(ctx, grpcPredOpts) + if tokenUsage.Prompt == 0 { + tokenUsage.Prompt = int(reply.PromptTokens) + } + if tokenUsage.Completion == 0 { + tokenUsage.Completion = int(reply.Tokens) + } if err != nil { rawResultChannel <- concurrency.ErrorOr[*LLMResponse]{Error: err} close(rawResultChannel) diff --git a/core/services/openai.go b/core/services/openai.go index 0f61d6f4..3fa041f5 100644 --- a/core/services/openai.go +++ b/core/services/openai.go @@ -160,7 +160,7 @@ func (oais *OpenAIService) GenerateTextFromRequest(request *schema.OpenAIRequest bc, request, err := oais.getConfig(request) if err != nil { - log.Error().Msgf("[oais::GenerateTextFromRequest] error getting configuration: %q", err) + log.Error().Err(err).Msgf("[oais::GenerateTextFromRequest] error getting configuration") return } @@ -259,7 +259,7 @@ func (oais *OpenAIService) GenerateTextFromRequest(request *schema.OpenAIRequest // If any of the setup goroutines experienced an error, quit early here. if setupError != nil { go func() { - log.Error().Msgf("[OAIS GenerateTextFromRequest] caught an error during setup: %q", setupError) + log.Error().Err(setupError).Msgf("[OAIS GenerateTextFromRequest] caught an error during setup") rawFinalResultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: setupError} close(rawFinalResultChannel) }() @@ -603,7 +603,7 @@ func (oais *OpenAIService) GenerateFromMultipleMessagesChatRequest(request *sche Usage: schema.OpenAIUsage{ PromptTokens: rawResult.Value.Usage.Prompt, CompletionTokens: rawResult.Value.Usage.Completion, - TotalTokens: rawResult.Value.Usage.Prompt + rawResult.Value.Usage.Prompt, + TotalTokens: rawResult.Value.Usage.Prompt + rawResult.Value.Usage.Completion, }, } @@ -644,7 +644,7 @@ func (oais *OpenAIService) GenerateFromMultipleMessagesChatRequest(request *sche Usage: schema.OpenAIUsage{ PromptTokens: rawResult.Value.Usage.Prompt, CompletionTokens: rawResult.Value.Usage.Completion, - TotalTokens: rawResult.Value.Usage.Prompt + rawResult.Value.Usage.Prompt, + TotalTokens: rawResult.Value.Usage.Prompt + rawResult.Value.Usage.Completion, }, } From c751a4ac06bab3736d464d16cadf02a04f822bb5 Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Mon, 15 Apr 2024 14:47:51 -0500 Subject: [PATCH 0058/2648] fix: remove build path from help text documentation (#2037) Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- docs/content/docs/advanced/advanced-usage.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/content/docs/advanced/advanced-usage.md b/docs/content/docs/advanced/advanced-usage.md index dace5803..4bd16030 100644 --- a/docs/content/docs/advanced/advanced-usage.md +++ b/docs/content/docs/advanced/advanced-usage.md @@ -384,6 +384,8 @@ docker run --env-file .env localai You can control LocalAI with command line arguments, to specify a binding address, or the number of threads. Any command line parameter can be specified via an environment variable. +In the help text below, BASEPATH is the location that local-ai is being executed from + #### Global Flags | Parameter | Default | Description | Environment Variable | |-----------|---------|-------------|----------------------| @@ -393,13 +395,13 @@ You can control LocalAI with command line arguments, to specify a binding addres #### Storage Flags | Parameter | Default | Description | Environment Variable | |-----------|---------|-------------|----------------------| -| --models-path | /home/cryptk/Documents/sourcecode/LocalAI/models | Path containing models used for inferencing | $LOCALAI_MODELS_PATH | +| --models-path | BASEPATH/models | Path containing models used for inferencing | $LOCALAI_MODELS_PATH | | --backend-assets-path |/tmp/localai/backend_data | Path used to extract libraries that are required by some of the backends in runtime | $LOCALAI_BACKEND_ASSETS_PATH | | --image-path | /tmp/generated/images | Location for images generated by backends (e.g. stablediffusion) | $LOCALAI_IMAGE_PATH | | --audio-path | /tmp/generated/audio | Location for audio generated by backends (e.g. piper) | $LOCALAI_AUDIO_PATH | | --upload-path | /tmp/localai/upload | Path to store uploads from files api | $LOCALAI_UPLOAD_PATH | | --config-path | /tmp/localai/config | | $LOCALAI_CONFIG_PATH | -| --localai-config-dir | /home/cryptk/Documents/sourcecode/LocalAI/configuration | Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json) | $LOCALAI_CONFIG_DIR | +| --localai-config-dir | BASEPATH/configuration | Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json) | $LOCALAI_CONFIG_DIR | | --models-config-file | STRING | YAML file containing a list of model backend configs | $LOCALAI_MODELS_CONFIG_FILE | #### Models Flags From 538a086309b91f4594d5513c0fd88e981877a83d Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Mon, 15 Apr 2024 15:13:59 -0500 Subject: [PATCH 0059/2648] fix: previous CLI rework broke debug logging (#2036) Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> Co-authored-by: Dave --- core/cli/run.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/cli/run.go b/core/cli/run.go index c3b186c0..cafc0b54 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -60,7 +60,7 @@ func (r *RunCMD) Run(ctx *Context) error { config.WithYAMLConfigPreload(r.PreloadModelsConfig), config.WithModelPath(r.ModelsPath), config.WithContextSize(r.ContextSize), - config.WithDebug(ctx.Debug), + config.WithDebug(*ctx.LogLevel == "debug"), config.WithImageDir(r.ImagePath), config.WithAudioDir(r.AudioPath), config.WithUploadDir(r.UploadPath), From b72c6cc9fc6c16db301c2b0d992ba03c348f43b1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 15 Apr 2024 20:52:39 +0000 Subject: [PATCH 0060/2648] build(deps): bump softprops/action-gh-release from 1 to 2 (#2039) Bumps [softprops/action-gh-release](https://github.com/softprops/action-gh-release) from 1 to 2. - [Release notes](https://github.com/softprops/action-gh-release/releases) - [Changelog](https://github.com/softprops/action-gh-release/blob/master/CHANGELOG.md) - [Commits](https://github.com/softprops/action-gh-release/compare/v1...v2) --- updated-dependencies: - dependency-name: softprops/action-gh-release dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/release.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 33c640cc..dc887fc1 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -92,7 +92,7 @@ jobs: name: LocalAI-linux-${{ matrix.build }} path: release/ - name: Release - uses: softprops/action-gh-release@v1 + uses: softprops/action-gh-release@v2 if: startsWith(github.ref, 'refs/tags/') with: files: | @@ -164,7 +164,7 @@ jobs: name: LocalAI-MacOS-${{ matrix.build }} path: release/ - name: Release - uses: softprops/action-gh-release@v1 + uses: softprops/action-gh-release@v2 if: startsWith(github.ref, 'refs/tags/') with: files: | @@ -211,7 +211,7 @@ jobs: name: LocalAI-MacOS-arm64-${{ matrix.build }} path: release/ - name: Release - uses: softprops/action-gh-release@v1 + uses: softprops/action-gh-release@v2 if: startsWith(github.ref, 'refs/tags/') with: files: | From 46609e936e5e644671855b004e89317300d3cfb9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 15 Apr 2024 21:37:06 +0000 Subject: [PATCH 0061/2648] build(deps): bump dependabot/fetch-metadata from 1.3.4 to 2.0.0 (#2040) Bumps [dependabot/fetch-metadata](https://github.com/dependabot/fetch-metadata) from 1.3.4 to 2.0.0. - [Release notes](https://github.com/dependabot/fetch-metadata/releases) - [Commits](https://github.com/dependabot/fetch-metadata/compare/v1.3.4...v2.0.0) --- updated-dependencies: - dependency-name: dependabot/fetch-metadata dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/dependabot_auto.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/dependabot_auto.yml b/.github/workflows/dependabot_auto.yml index be3a40a8..8e32aee1 100644 --- a/.github/workflows/dependabot_auto.yml +++ b/.github/workflows/dependabot_auto.yml @@ -14,7 +14,7 @@ jobs: steps: - name: Dependabot metadata id: metadata - uses: dependabot/fetch-metadata@v1.3.4 + uses: dependabot/fetch-metadata@v2.0.0 with: github-token: "${{ secrets.GITHUB_TOKEN }}" skip-commit-verification: true From 320d8a48d9bd09b5fda1c4330d8d693ccc705fcc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 15 Apr 2024 22:02:44 +0000 Subject: [PATCH 0062/2648] build(deps): bump github/codeql-action from 2 to 3 (#2041) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 2 to 3. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/v2...v3) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/secscan.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/secscan.yaml b/.github/workflows/secscan.yaml index 884b84d5..d9743d9e 100644 --- a/.github/workflows/secscan.yaml +++ b/.github/workflows/secscan.yaml @@ -24,7 +24,7 @@ jobs: args: '-no-fail -fmt sarif -out results.sarif ./...' - name: Upload SARIF file if: ${{ github.actor != 'dependabot[bot]' }} - uses: github/codeql-action/upload-sarif@v2 + uses: github/codeql-action/upload-sarif@v3 with: # Path to SARIF file relative to the root of the repository sarif_file: results.sarif From cdece3879f4658eaccb3394d9e29b9534c8b773b Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 16 Apr 2024 00:47:29 +0200 Subject: [PATCH 0063/2648] :arrow_up: Update ggerganov/llama.cpp (#2043) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 04745f39..37130567 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=1958f7e06ca2d2e3ab5698cc67513ba359144d8e +CPPLLAMA_VERSION?=7593639ce335e8d7f89aa9a54d616951f273af60 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 0cc1ad21889d9dca21f71dfe7f47a87a0ddf0012 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 16 Apr 2024 01:27:52 +0200 Subject: [PATCH 0064/2648] :arrow_up: Update ggerganov/whisper.cpp (#2042) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 37130567..7cde8fa7 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6 # whisper.cpp version -WHISPER_CPP_VERSION?=8f253ef3af1c62c04316ba4afa7145fc4d701a8c +WHISPER_CPP_VERSION?=9fab28135c7867bb7eccd9ebcd2ea8d52e42ca81 # bert.cpp version BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d From fdec8a9d00a034ccd8e075008edd165147edf328 Mon Sep 17 00:00:00 2001 From: Dave Date: Mon, 15 Apr 2024 21:46:36 -0400 Subject: [PATCH 0065/2648] fix: action-tmate back to upstream, dead code removal (#2038) cleanup: upstream action-tmate has taken my PR, drop master reference. Also remove dead code from api.go Signed-off-by: Dave Lee --- .github/workflows/test.yml | 6 +++--- core/http/api.go | 18 ------------------ 2 files changed, 3 insertions(+), 21 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 29bd3e08..156294b5 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -121,7 +121,7 @@ jobs: PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test - name: Setup tmate session if tests fail if: ${{ failure() }} - uses: dave-gray101/action-tmate@master + uses: mxschmitt/action-tmate@v3.18 with: connect-timeout-seconds: 180 @@ -174,7 +174,7 @@ jobs: make run-e2e-aio - name: Setup tmate session if tests fail if: ${{ failure() }} - uses: dave-gray101/action-tmate@master + uses: mxschmitt/action-tmate@v3.18 with: connect-timeout-seconds: 180 @@ -209,6 +209,6 @@ jobs: BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make --jobs 4 --output-sync=target test - name: Setup tmate session if tests fail if: ${{ failure() }} - uses: dave-gray101/action-tmate@master + uses: mxschmitt/action-tmate@v3.18 with: connect-timeout-seconds: 180 \ No newline at end of file diff --git a/core/http/api.go b/core/http/api.go index 5c9095ea..7094899a 100644 --- a/core/http/api.go +++ b/core/http/api.go @@ -108,24 +108,6 @@ func App(application *core.Application) (*fiber.App, error) { return c.Next() } - // // Check for api_keys.json file - // fileContent, err := os.ReadFile("api_keys.json") - // if err == nil { - // // Parse JSON content from the file - // var fileKeys []string - // err := json.Unmarshal(fileContent, &fileKeys) - // if err != nil { - // return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{"message": "Error parsing api_keys.json"}) - // } - - // // Add file keys to options.ApiKeys - // application.ApplicationConfig.ApiKeys = append(application.ApplicationConfig.ApiKeys, fileKeys...) - // } - - // if len(application.ApplicationConfig.ApiKeys) == 0 { - // return c.Next() - // } - authHeader := readAuthHeader(c) if authHeader == "" { return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Authorization header missing"}) From df4a13a08bf91491652e7baa65bd1eafa2c0ff44 Mon Sep 17 00:00:00 2001 From: Adrien Brault Date: Tue, 16 Apr 2024 11:10:23 +0200 Subject: [PATCH 0066/2648] docs: fix stores link (#2044) Signed-off-by: Adrien Brault --- docs/content/docs/overview.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/content/docs/overview.md b/docs/content/docs/overview.md index 6aede1d6..5224bc49 100644 --- a/docs/content/docs/overview.md +++ b/docs/content/docs/overview.md @@ -100,7 +100,7 @@ Note that this started just as a fun weekend project by [mudler](https://github. - ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/) - 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/) - 🆕 [Vision API](https://localai.io/features/gpt-vision/) -- 💾 [Stores](https://localai.io/features/stores) +- 💾 [Stores](https://localai.io/stores) ## Contribute and help From 33c78d2228891caacb9d8bc7dc2c567caaf12a53 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 16 Apr 2024 15:54:14 +0200 Subject: [PATCH 0067/2648] feat(store): add Golang client (#1977) This adds a basic store client for Go Signed-off-by: Ettore Di Giacinto --- core/clients/store.go | 151 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 core/clients/store.go diff --git a/core/clients/store.go b/core/clients/store.go new file mode 100644 index 00000000..f737ee42 --- /dev/null +++ b/core/clients/store.go @@ -0,0 +1,151 @@ +package clients + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "net/http" +) + +// Define a struct to hold the store API client +type StoreClient struct { + BaseURL string + Client *http.Client +} + +type SetRequest struct { + Keys [][]float32 `json:"keys"` + Values []string `json:"values"` +} + +type GetRequest struct { + Keys [][]float32 `json:"keys"` +} + +type GetResponse struct { + Keys [][]float32 `json:"keys"` + Values []string `json:"values"` +} + +type DeleteRequest struct { + Keys [][]float32 `json:"keys"` +} + +type FindRequest struct { + TopK int `json:"topk"` + Key []float32 `json:"key"` +} + +type FindResponse struct { + Keys [][]float32 `json:"keys"` + Values []string `json:"values"` + Similarities []float32 `json:"similarities"` +} + +// Constructor for StoreClient +func NewStoreClient(baseUrl string) *StoreClient { + return &StoreClient{ + BaseURL: baseUrl, + Client: &http.Client{}, + } +} + +// Implement Set method +func (c *StoreClient) Set(req SetRequest) error { + return c.doRequest("stores/set", req) +} + +// Implement Get method +func (c *StoreClient) Get(req GetRequest) (*GetResponse, error) { + body, err := c.doRequestWithResponse("stores/get", req) + if err != nil { + return nil, err + } + + var resp GetResponse + err = json.Unmarshal(body, &resp) + if err != nil { + return nil, err + } + + return &resp, nil +} + +// Implement Delete method +func (c *StoreClient) Delete(req DeleteRequest) error { + return c.doRequest("stores/delete", req) +} + +// Implement Find method +func (c *StoreClient) Find(req FindRequest) (*FindResponse, error) { + body, err := c.doRequestWithResponse("stores/find", req) + if err != nil { + return nil, err + } + + var resp FindResponse + err = json.Unmarshal(body, &resp) + if err != nil { + return nil, err + } + + return &resp, nil +} + +// Helper function to perform a request without expecting a response body +func (c *StoreClient) doRequest(path string, data interface{}) error { + jsonData, err := json.Marshal(data) + if err != nil { + return err + } + + req, err := http.NewRequest("POST", c.BaseURL+"/"+path, bytes.NewBuffer(jsonData)) + if err != nil { + return err + } + req.Header.Set("Content-Type", "application/json") + + resp, err := c.Client.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("API request to %s failed with status code %d", path, resp.StatusCode) + } + + return nil +} + +// Helper function to perform a request and parse the response body +func (c *StoreClient) doRequestWithResponse(path string, data interface{}) ([]byte, error) { + jsonData, err := json.Marshal(data) + if err != nil { + return nil, err + } + + req, err := http.NewRequest("POST", c.BaseURL+"/"+path, bytes.NewBuffer(jsonData)) + if err != nil { + return nil, err + } + req.Header.Set("Content-Type", "application/json") + + resp, err := c.Client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("API request to %s failed with status code %d", path, resp.StatusCode) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, err + } + + return body, nil +} From bcaa320f3611deb3c897b51c0240a186b51f21ba Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 16 Apr 2024 19:49:54 +0000 Subject: [PATCH 0068/2648] build(deps): bump the pip group across 4 directories with 8 updates (#2049) Bumps the pip group with 1 update in the /examples/functions directory: [langchain](https://github.com/langchain-ai/langchain). Bumps the pip group with 2 updates in the /examples/langchain-chroma directory: [langchain](https://github.com/langchain-ai/langchain) and [llama-index](https://github.com/run-llama/llama_index). Bumps the pip group with 6 updates in the /examples/langchain/langchainpy-localai-example directory: | Package | From | To | | --- | --- | --- | | [langchain](https://github.com/langchain-ai/langchain) | `0.0.159` | `0.1.0` | | [aiohttp](https://github.com/aio-libs/aiohttp) | `3.8.4` | `3.9.2` | | [certifi](https://github.com/certifi/python-certifi) | `2022.12.7` | `2023.7.22` | | [idna](https://github.com/kjd/idna) | `3.4` | `3.7` | | [requests](https://github.com/psf/requests) | `2.29.0` | `2.31.0` | | [urllib3](https://github.com/urllib3/urllib3) | `1.26.15` | `1.26.18` | Bumps the pip group with 1 update in the /examples/streamlit-bot directory: [streamlit](https://github.com/streamlit/streamlit). Updates `langchain` from 0.0.234 to 0.1.0 - [Release notes](https://github.com/langchain-ai/langchain/releases) - [Commits](https://github.com/langchain-ai/langchain/compare/v0.0.234...v0.1.0) Updates `langchain` from 0.0.160 to 0.1.0 - [Release notes](https://github.com/langchain-ai/langchain/releases) - [Commits](https://github.com/langchain-ai/langchain/compare/v0.0.234...v0.1.0) Updates `llama-index` from 0.6.2 to 0.9.36 - [Release notes](https://github.com/run-llama/llama_index/releases) - [Changelog](https://github.com/run-llama/llama_index/blob/main/CHANGELOG.md) - [Commits](https://github.com/run-llama/llama_index/compare/v0.6.2...v0.9.36) Updates `langchain` from 0.0.159 to 0.1.0 - [Release notes](https://github.com/langchain-ai/langchain/releases) - [Commits](https://github.com/langchain-ai/langchain/compare/v0.0.234...v0.1.0) Updates `aiohttp` from 3.8.4 to 3.9.2 - [Release notes](https://github.com/aio-libs/aiohttp/releases) - [Changelog](https://github.com/aio-libs/aiohttp/blob/master/CHANGES.rst) - [Commits](https://github.com/aio-libs/aiohttp/compare/v3.8.4...v3.9.2) Updates `certifi` from 2022.12.7 to 2023.7.22 - [Commits](https://github.com/certifi/python-certifi/compare/2022.12.07...2023.07.22) Updates `idna` from 3.4 to 3.7 - [Release notes](https://github.com/kjd/idna/releases) - [Changelog](https://github.com/kjd/idna/blob/master/HISTORY.rst) - [Commits](https://github.com/kjd/idna/compare/v3.4...v3.7) Updates `requests` from 2.29.0 to 2.31.0 - [Release notes](https://github.com/psf/requests/releases) - [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md) - [Commits](https://github.com/psf/requests/compare/v2.29.0...v2.31.0) Updates `urllib3` from 1.26.15 to 1.26.18 - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/1.26.15...1.26.18) Updates `streamlit` from 1.26.0 to 1.30.0 - [Release notes](https://github.com/streamlit/streamlit/releases) - [Commits](https://github.com/streamlit/streamlit/compare/1.26.0...1.30.0) --- updated-dependencies: - dependency-name: langchain dependency-type: direct:production dependency-group: pip - dependency-name: langchain dependency-type: direct:production dependency-group: pip - dependency-name: llama-index dependency-type: direct:production dependency-group: pip - dependency-name: langchain dependency-type: direct:production dependency-group: pip - dependency-name: aiohttp dependency-type: direct:production dependency-group: pip - dependency-name: certifi dependency-type: direct:production dependency-group: pip - dependency-name: idna dependency-type: direct:production dependency-group: pip - dependency-name: requests dependency-type: direct:production dependency-group: pip - dependency-name: urllib3 dependency-type: direct:production dependency-group: pip - dependency-name: streamlit dependency-type: direct:production dependency-group: pip ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- examples/functions/requirements.txt | 2 +- examples/langchain-chroma/requirements.txt | 4 ++-- .../langchainpy-localai-example/requirements.txt | 12 ++++++------ examples/streamlit-bot/requirements.txt | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/examples/functions/requirements.txt b/examples/functions/requirements.txt index 7164e011..759c5b03 100644 --- a/examples/functions/requirements.txt +++ b/examples/functions/requirements.txt @@ -1,2 +1,2 @@ -langchain==0.0.234 +langchain==0.1.0 openai==0.27.8 diff --git a/examples/langchain-chroma/requirements.txt b/examples/langchain-chroma/requirements.txt index b9e649c5..cdf466b9 100644 --- a/examples/langchain-chroma/requirements.txt +++ b/examples/langchain-chroma/requirements.txt @@ -1,4 +1,4 @@ -langchain==0.0.160 +langchain==0.1.0 openai==0.27.6 chromadb==0.3.21 -llama-index==0.6.2 \ No newline at end of file +llama-index==0.9.36 \ No newline at end of file diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt index 2de5bcf0..1e63b0bf 100644 --- a/examples/langchain/langchainpy-localai-example/requirements.txt +++ b/examples/langchain/langchainpy-localai-example/requirements.txt @@ -1,16 +1,16 @@ -aiohttp==3.8.4 +aiohttp==3.9.2 aiosignal==1.3.1 async-timeout==4.0.2 attrs==23.1.0 -certifi==2022.12.7 +certifi==2023.7.22 charset-normalizer==3.1.0 colorama==0.4.6 dataclasses-json==0.5.7 debugpy==1.6.7 frozenlist==1.3.3 greenlet==2.0.2 -idna==3.4 -langchain==0.0.159 +idna==3.7 +langchain==0.1.0 marshmallow==3.19.0 marshmallow-enum==1.5.1 multidict==6.0.4 @@ -22,11 +22,11 @@ openapi-schema-pydantic==1.2.4 packaging==23.1 pydantic==1.10.7 PyYAML==6.0 -requests==2.29.0 +requests==2.31.0 SQLAlchemy==2.0.12 tenacity==8.2.2 tqdm==4.65.0 typing-inspect==0.8.0 typing_extensions==4.5.0 -urllib3==1.26.15 +urllib3==1.26.18 yarl==1.9.2 diff --git a/examples/streamlit-bot/requirements.txt b/examples/streamlit-bot/requirements.txt index ae527c76..1fcd5093 100644 --- a/examples/streamlit-bot/requirements.txt +++ b/examples/streamlit-bot/requirements.txt @@ -1,2 +1,2 @@ -streamlit==1.26.0 +streamlit==1.30.0 requests \ No newline at end of file From 6b06d4e0af4db7a8aa8e131ec2b3af171934862e Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 16 Apr 2024 23:20:11 +0200 Subject: [PATCH 0069/2648] fix(fncall): fix regression introduced in #1963 (#2048) Signed-off-by: Dave --------- Signed-off-by: Ettore Di Giacinto Signed-off-by: Dave Co-authored-by: Dave --- core/services/openai.go | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/core/services/openai.go b/core/services/openai.go index 3fa041f5..7a2679ad 100644 --- a/core/services/openai.go +++ b/core/services/openai.go @@ -778,13 +778,16 @@ func parseFunctionCall(llmresult string, multipleResults bool) []funcCallResults // As we have to change the result before processing, we can't stream the answer token-by-token (yet?) ss := map[string]interface{}{} // This prevent newlines to break JSON parsing for clients - // s := utils.EscapeNewLines(llmresult) - json.Unmarshal([]byte(llmresult), &ss) + s := utils.EscapeNewLines(llmresult) + if err := json.Unmarshal([]byte(s), &ss); err != nil { + log.Error().Msgf("error unmarshalling JSON: %s", err.Error()) + return results + } // The grammar defines the function name as "function", while OpenAI returns "name" func_name, ok := ss["function"] if !ok { - log.Debug().Msg("ss[function] is not OK!") + log.Debug().Msgf("ss[function] is not OK!, llm result: %q", llmresult) return results } // Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object From 5763dc161376c86e4611ee9b7be54073a4fccf5b Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 16 Apr 2024 23:37:50 +0200 Subject: [PATCH 0070/2648] :arrow_up: Update ggerganov/whisper.cpp (#2050) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7cde8fa7..f5b4dc2a 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6 # whisper.cpp version -WHISPER_CPP_VERSION?=9fab28135c7867bb7eccd9ebcd2ea8d52e42ca81 +WHISPER_CPP_VERSION?=a750868428868abd437e228ae5cab763ef3dc387 # bert.cpp version BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d From af8c705ecd1ec47ca1254d7e7b8ab7ca7da89b57 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 17 Apr 2024 23:17:25 +0200 Subject: [PATCH 0071/2648] :arrow_up: Update ggerganov/whisper.cpp (#2060) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f5b4dc2a..fdc7aade 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6 # whisper.cpp version -WHISPER_CPP_VERSION?=a750868428868abd437e228ae5cab763ef3dc387 +WHISPER_CPP_VERSION?=b0c3cbf2e851cf232e432b590dcc514a689ec028 # bert.cpp version BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d From af9e5a2d05d477eedaf1bff08370208d2b4a9d86 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 17 Apr 2024 23:33:49 +0200 Subject: [PATCH 0072/2648] Revert #1963 (#2056) * Revert "fix(fncall): fix regression introduced in #1963 (#2048)" This reverts commit 6b06d4e0af4db7a8aa8e131ec2b3af171934862e. * Revert "fix: action-tmate back to upstream, dead code removal (#2038)" This reverts commit fdec8a9d00a034ccd8e075008edd165147edf328. * Revert "feat(grpc): return consumed token count and update response accordingly (#2035)" This reverts commit e843d7df0e8b177ab122a9f7bfa7196274ccd204. * Revert "refactor: backend/service split, channel-based llm flow (#1963)" This reverts commit eed5706994a3e770a0194cad9d1cfd724ba1b10a. * feat(grpc): return consumed token count and update response accordingly Fixes: #1920 Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto --- .github/workflows/test.yml | 15 +- Makefile | 18 +- backend/go/transcribe/transcript.go | 6 +- backend/go/transcribe/whisper.go | 2 +- core/backend/embeddings.go | 90 +- core/backend/image.go | 259 +----- core/backend/llm.go | 271 ++---- core/backend/options.go | 84 +- core/backend/transcript.go | 41 +- core/backend/tts.go | 77 +- core/cli/run.go | 8 +- core/cli/transcript.go | 19 +- core/cli/tts.go | 26 +- core/config/backend_config.go | 301 ++++++- core/config/backend_config_loader.go | 509 ----------- core/config/exports_test.go | 6 - core/http/api.go | 227 ++--- core/http/api_test.go | 98 +-- core/http/ctx/fiber.go | 65 +- core/http/endpoints/elevenlabs/tts.go | 39 +- .../http/endpoints/localai/backend_monitor.go | 4 +- core/http/endpoints/localai/tts.go | 39 +- core/http/endpoints/openai/assistant.go | 2 +- core/http/endpoints/openai/chat.go | 621 ++++++++++++-- core/http/endpoints/openai/completion.go | 163 +++- core/http/endpoints/openai/edit.go | 78 +- core/http/endpoints/openai/embeddings.go | 65 +- core/http/endpoints/openai/image.go | 216 ++++- core/http/endpoints/openai/inference.go | 55 ++ core/http/endpoints/openai/list.go | 52 +- core/http/endpoints/openai/request.go | 285 ++++++ core/http/endpoints/openai/transcription.go | 28 +- core/schema/{transcription.go => whisper.go} | 2 +- core/services/backend_monitor.go | 30 +- core/services/gallery.go | 116 +-- core/services/list_models.go | 72 -- core/services/openai.go | 808 ------------------ core/startup/startup.go | 91 +- core/state.go | 41 - .../llm text/-completions Stream.bru | 25 - pkg/concurrency/concurrency.go | 135 --- pkg/concurrency/concurrency_test.go | 101 --- pkg/concurrency/types.go | 6 - pkg/grpc/backend.go | 2 +- pkg/grpc/base/base.go | 4 +- pkg/grpc/client.go | 4 +- pkg/grpc/embed.go | 4 +- pkg/grpc/interface.go | 2 +- pkg/model/initializers.go | 8 +- pkg/startup/model_preload.go | 85 ++ .../startup}/model_preload_test.go | 5 +- pkg/utils/base64.go | 50 -- 52 files changed, 2295 insertions(+), 3065 deletions(-) delete mode 100644 core/config/backend_config_loader.go delete mode 100644 core/config/exports_test.go create mode 100644 core/http/endpoints/openai/inference.go create mode 100644 core/http/endpoints/openai/request.go rename core/schema/{transcription.go => whisper.go} (90%) delete mode 100644 core/services/list_models.go delete mode 100644 core/services/openai.go delete mode 100644 core/state.go delete mode 100644 examples/bruno/LocalAI Test Requests/llm text/-completions Stream.bru delete mode 100644 pkg/concurrency/concurrency.go delete mode 100644 pkg/concurrency/concurrency_test.go delete mode 100644 pkg/concurrency/types.go create mode 100644 pkg/startup/model_preload.go rename {core/services => pkg/startup}/model_preload_test.go (96%) delete mode 100644 pkg/utils/base64.go diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 156294b5..46c4e065 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -121,9 +121,8 @@ jobs: PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test - name: Setup tmate session if tests fail if: ${{ failure() }} - uses: mxschmitt/action-tmate@v3.18 - with: - connect-timeout-seconds: 180 + uses: mxschmitt/action-tmate@v3 + timeout-minutes: 5 tests-aio-container: runs-on: ubuntu-latest @@ -174,9 +173,8 @@ jobs: make run-e2e-aio - name: Setup tmate session if tests fail if: ${{ failure() }} - uses: mxschmitt/action-tmate@v3.18 - with: - connect-timeout-seconds: 180 + uses: mxschmitt/action-tmate@v3 + timeout-minutes: 5 tests-apple: runs-on: macOS-14 @@ -209,6 +207,5 @@ jobs: BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make --jobs 4 --output-sync=target test - name: Setup tmate session if tests fail if: ${{ failure() }} - uses: mxschmitt/action-tmate@v3.18 - with: - connect-timeout-seconds: 180 \ No newline at end of file + uses: mxschmitt/action-tmate@v3 + timeout-minutes: 5 \ No newline at end of file diff --git a/Makefile b/Makefile index fdc7aade..6715e91e 100644 --- a/Makefile +++ b/Makefile @@ -301,9 +301,6 @@ clean-tests: rm -rf test-dir rm -rf core/http/backend-assets -halt-backends: ## Used to clean up stray backends sometimes left running when debugging manually - ps | grep 'backend-assets/grpc/' | awk '{print $$1}' | xargs -I {} kill -9 {} - ## Build: build: prepare backend-assets grpcs ## Build the project $(info ${GREEN}I local-ai build info:${RESET}) @@ -368,13 +365,13 @@ run-e2e-image: run-e2e-aio: @echo 'Running e2e AIO tests' - $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e-aio + $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e-aio test-e2e: @echo 'Running e2e tests' BUILD_TYPE=$(BUILD_TYPE) \ LOCALAI_API=http://$(E2E_BRIDGE_IP):5390/v1 \ - $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e + $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e teardown-e2e: rm -rf $(TEST_DIR) || true @@ -382,15 +379,15 @@ teardown-e2e: test-gpt4all: prepare-test TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ - $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS) + $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts 5 -v -r $(TEST_PATHS) test-llama: prepare-test TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ - $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS) + $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r $(TEST_PATHS) test-llama-gguf: prepare-test TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ - $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS) + $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts 5 -v -r $(TEST_PATHS) test-tts: prepare-test TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ @@ -648,10 +645,7 @@ backend-assets/grpc/llama-ggml: sources/go-llama-ggml sources/go-llama-ggml/libb $(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama-ggml CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama-ggml LIBRARY_PATH=$(CURDIR)/sources/go-llama-ggml \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/ -# EXPERIMENTAL: -ifeq ($(BUILD_TYPE),metal) - cp $(CURDIR)/sources/go-llama-ggml/llama.cpp/ggml-metal.metal backend-assets/grpc/ -endif + backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/ diff --git a/backend/go/transcribe/transcript.go b/backend/go/transcribe/transcript.go index b38d5b9f..fdfaa974 100644 --- a/backend/go/transcribe/transcript.go +++ b/backend/go/transcribe/transcript.go @@ -21,7 +21,7 @@ func runCommand(command []string) (string, error) { // AudioToWav converts audio to wav for transcribe. // TODO: use https://github.com/mccoyst/ogg? func audioToWav(src, dst string) error { - command := []string{"ffmpeg", "-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst} + command := []string{"ffmpeg", "-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst} out, err := runCommand(command) if err != nil { return fmt.Errorf("error: %w out: %s", err, out) @@ -29,8 +29,8 @@ func audioToWav(src, dst string) error { return nil } -func Transcript(model whisper.Model, audiopath, language string, threads uint) (schema.TranscriptionResult, error) { - res := schema.TranscriptionResult{} +func Transcript(model whisper.Model, audiopath, language string, threads uint) (schema.Result, error) { + res := schema.Result{} dir, err := os.MkdirTemp("", "whisper") if err != nil { diff --git a/backend/go/transcribe/whisper.go b/backend/go/transcribe/whisper.go index a9a62d24..ac93be01 100644 --- a/backend/go/transcribe/whisper.go +++ b/backend/go/transcribe/whisper.go @@ -21,6 +21,6 @@ func (sd *Whisper) Load(opts *pb.ModelOptions) error { return err } -func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.TranscriptionResult, error) { +func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.Result, error) { return Transcript(sd.whisper, opts.Dst, opts.Language, uint(opts.Threads)) } diff --git a/core/backend/embeddings.go b/core/backend/embeddings.go index 2c63dedc..03ff90b9 100644 --- a/core/backend/embeddings.go +++ b/core/backend/embeddings.go @@ -2,100 +2,14 @@ package backend import ( "fmt" - "time" "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/schema" - "github.com/google/uuid" - "github.com/go-skynet/LocalAI/pkg/concurrency" "github.com/go-skynet/LocalAI/pkg/grpc" - "github.com/go-skynet/LocalAI/pkg/model" + model "github.com/go-skynet/LocalAI/pkg/model" ) -type EmbeddingsBackendService struct { - ml *model.ModelLoader - bcl *config.BackendConfigLoader - appConfig *config.ApplicationConfig -} - -func NewEmbeddingsBackendService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *EmbeddingsBackendService { - return &EmbeddingsBackendService{ - ml: ml, - bcl: bcl, - appConfig: appConfig, - } -} - -func (ebs *EmbeddingsBackendService) Embeddings(request *schema.OpenAIRequest) <-chan concurrency.ErrorOr[*schema.OpenAIResponse] { - - resultChannel := make(chan concurrency.ErrorOr[*schema.OpenAIResponse]) - go func(request *schema.OpenAIRequest) { - if request.Model == "" { - request.Model = model.StableDiffusionBackend - } - - bc, request, err := ebs.bcl.LoadBackendConfigForModelAndOpenAIRequest(request.Model, request, ebs.appConfig) - if err != nil { - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} - close(resultChannel) - return - } - - items := []schema.Item{} - - for i, s := range bc.InputToken { - // get the model function to call for the result - embedFn, err := modelEmbedding("", s, ebs.ml, bc, ebs.appConfig) - if err != nil { - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} - close(resultChannel) - return - } - - embeddings, err := embedFn() - if err != nil { - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} - close(resultChannel) - return - } - items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"}) - } - - for i, s := range bc.InputStrings { - // get the model function to call for the result - embedFn, err := modelEmbedding(s, []int{}, ebs.ml, bc, ebs.appConfig) - if err != nil { - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} - close(resultChannel) - return - } - - embeddings, err := embedFn() - if err != nil { - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} - close(resultChannel) - return - } - items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"}) - } - - id := uuid.New().String() - created := int(time.Now().Unix()) - resp := &schema.OpenAIResponse{ - ID: id, - Created: created, - Model: request.Model, // we have to return what the user sent here, due to OpenAI spec. - Data: items, - Object: "list", - } - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: resp} - close(resultChannel) - }(request) - return resultChannel -} - -func modelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig *config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) { +func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) { modelFile := backendConfig.Model grpcOpts := gRPCModelOpts(backendConfig) diff --git a/core/backend/image.go b/core/backend/image.go index affb3bb3..b0cffb0b 100644 --- a/core/backend/image.go +++ b/core/backend/image.go @@ -1,252 +1,18 @@ package backend import ( - "bufio" - "encoding/base64" - "fmt" - "io" - "net/http" - "os" - "path/filepath" - "strconv" - "strings" - "time" - "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/schema" - "github.com/google/uuid" - "github.com/rs/zerolog/log" - "github.com/go-skynet/LocalAI/pkg/concurrency" "github.com/go-skynet/LocalAI/pkg/grpc/proto" - "github.com/go-skynet/LocalAI/pkg/model" + model "github.com/go-skynet/LocalAI/pkg/model" ) -type ImageGenerationBackendService struct { - ml *model.ModelLoader - bcl *config.BackendConfigLoader - appConfig *config.ApplicationConfig - BaseUrlForGeneratedImages string -} - -func NewImageGenerationBackendService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *ImageGenerationBackendService { - return &ImageGenerationBackendService{ - ml: ml, - bcl: bcl, - appConfig: appConfig, - } -} - -func (igbs *ImageGenerationBackendService) GenerateImage(request *schema.OpenAIRequest) <-chan concurrency.ErrorOr[*schema.OpenAIResponse] { - resultChannel := make(chan concurrency.ErrorOr[*schema.OpenAIResponse]) - go func(request *schema.OpenAIRequest) { - bc, request, err := igbs.bcl.LoadBackendConfigForModelAndOpenAIRequest(request.Model, request, igbs.appConfig) - if err != nil { - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} - close(resultChannel) - return - } - - src := "" - if request.File != "" { - - var fileData []byte - // check if input.File is an URL, if so download it and save it - // to a temporary file - if strings.HasPrefix(request.File, "http://") || strings.HasPrefix(request.File, "https://") { - out, err := downloadFile(request.File) - if err != nil { - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: fmt.Errorf("failed downloading file:%w", err)} - close(resultChannel) - return - } - defer os.RemoveAll(out) - - fileData, err = os.ReadFile(out) - if err != nil { - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: fmt.Errorf("failed reading file:%w", err)} - close(resultChannel) - return - } - - } else { - // base 64 decode the file and write it somewhere - // that we will cleanup - fileData, err = base64.StdEncoding.DecodeString(request.File) - if err != nil { - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} - close(resultChannel) - return - } - } - - // Create a temporary file - outputFile, err := os.CreateTemp(igbs.appConfig.ImageDir, "b64") - if err != nil { - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} - close(resultChannel) - return - } - // write the base64 result - writer := bufio.NewWriter(outputFile) - _, err = writer.Write(fileData) - if err != nil { - outputFile.Close() - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} - close(resultChannel) - return - } - outputFile.Close() - src = outputFile.Name() - defer os.RemoveAll(src) - } - - log.Debug().Msgf("Parameter Config: %+v", bc) - - switch bc.Backend { - case "stablediffusion": - bc.Backend = model.StableDiffusionBackend - case "tinydream": - bc.Backend = model.TinyDreamBackend - case "": - bc.Backend = model.StableDiffusionBackend - if bc.Model == "" { - bc.Model = "stablediffusion_assets" // TODO: check? - } - } - - sizeParts := strings.Split(request.Size, "x") - if len(sizeParts) != 2 { - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: fmt.Errorf("invalid value for 'size'")} - close(resultChannel) - return - } - width, err := strconv.Atoi(sizeParts[0]) - if err != nil { - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: fmt.Errorf("invalid value for 'size'")} - close(resultChannel) - return - } - height, err := strconv.Atoi(sizeParts[1]) - if err != nil { - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: fmt.Errorf("invalid value for 'size'")} - close(resultChannel) - return - } - - b64JSON := false - if request.ResponseFormat.Type == "b64_json" { - b64JSON = true - } - // src and clip_skip - var result []schema.Item - for _, i := range bc.PromptStrings { - n := request.N - if request.N == 0 { - n = 1 - } - for j := 0; j < n; j++ { - prompts := strings.Split(i, "|") - positive_prompt := prompts[0] - negative_prompt := "" - if len(prompts) > 1 { - negative_prompt = prompts[1] - } - - mode := 0 - step := bc.Step - if step == 0 { - step = 15 - } - - if request.Mode != 0 { - mode = request.Mode - } - - if request.Step != 0 { - step = request.Step - } - - tempDir := "" - if !b64JSON { - tempDir = igbs.appConfig.ImageDir - } - // Create a temporary file - outputFile, err := os.CreateTemp(tempDir, "b64") - if err != nil { - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} - close(resultChannel) - return - } - outputFile.Close() - output := outputFile.Name() + ".png" - // Rename the temporary file - err = os.Rename(outputFile.Name(), output) - if err != nil { - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} - close(resultChannel) - return - } - - if request.Seed == nil { - zVal := 0 // Idiomatic way to do this? Actually needed? - request.Seed = &zVal - } - - fn, err := imageGeneration(height, width, mode, step, *request.Seed, positive_prompt, negative_prompt, src, output, igbs.ml, bc, igbs.appConfig) - if err != nil { - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} - close(resultChannel) - return - } - if err := fn(); err != nil { - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} - close(resultChannel) - return - } - - item := &schema.Item{} - - if b64JSON { - defer os.RemoveAll(output) - data, err := os.ReadFile(output) - if err != nil { - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err} - close(resultChannel) - return - } - item.B64JSON = base64.StdEncoding.EncodeToString(data) - } else { - base := filepath.Base(output) - item.URL = igbs.BaseUrlForGeneratedImages + base - } - - result = append(result, *item) - } - } - - id := uuid.New().String() - created := int(time.Now().Unix()) - resp := &schema.OpenAIResponse{ - ID: id, - Created: created, - Data: result, - } - resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: resp} - close(resultChannel) - }(request) - return resultChannel -} - -func imageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig *config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) { - +func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) { threads := backendConfig.Threads if *threads == 0 && appConfig.Threads != 0 { threads = &appConfig.Threads } - gRPCOpts := gRPCModelOpts(backendConfig) - opts := modelOpts(backendConfig, appConfig, []model.Option{ model.WithBackendString(backendConfig.Backend), model.WithAssetDir(appConfig.AssetsDestination), @@ -284,24 +50,3 @@ func imageGeneration(height, width, mode, step, seed int, positive_prompt, negat return fn, nil } - -// TODO: Replace this function with pkg/downloader - no reason to have a (crappier) bespoke download file fn here, but get things working before that change. -func downloadFile(url string) (string, error) { - // Get the data - resp, err := http.Get(url) - if err != nil { - return "", err - } - defer resp.Body.Close() - - // Create the file - out, err := os.CreateTemp("", "image") - if err != nil { - return "", err - } - defer out.Close() - - // Write the body to file - _, err = io.Copy(out, resp.Body) - return out.Name(), err -} diff --git a/core/backend/llm.go b/core/backend/llm.go index 75766d78..a4d1e5f3 100644 --- a/core/backend/llm.go +++ b/core/backend/llm.go @@ -11,22 +11,17 @@ import ( "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/schema" - "github.com/rs/zerolog/log" - "github.com/go-skynet/LocalAI/pkg/concurrency" "github.com/go-skynet/LocalAI/pkg/gallery" "github.com/go-skynet/LocalAI/pkg/grpc" "github.com/go-skynet/LocalAI/pkg/grpc/proto" - "github.com/go-skynet/LocalAI/pkg/model" + model "github.com/go-skynet/LocalAI/pkg/model" "github.com/go-skynet/LocalAI/pkg/utils" ) -type LLMRequest struct { - Id int // TODO Remove if not used. - Text string - Images []string - RawMessages []schema.Message - // TODO: Other Modalities? +type LLMResponse struct { + Response string // should this be []byte? + Usage TokenUsage } type TokenUsage struct { @@ -34,94 +29,57 @@ type TokenUsage struct { Completion int } -type LLMResponse struct { - Request *LLMRequest - Response string // should this be []byte? - Usage TokenUsage -} - -// TODO: Does this belong here or in core/services/openai.go? -type LLMResponseBundle struct { - Request *schema.OpenAIRequest - Response []schema.Choice - Usage TokenUsage -} - -type LLMBackendService struct { - bcl *config.BackendConfigLoader - ml *model.ModelLoader - appConfig *config.ApplicationConfig - ftMutex sync.Mutex - cutstrings map[string]*regexp.Regexp -} - -func NewLLMBackendService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *LLMBackendService { - return &LLMBackendService{ - bcl: bcl, - ml: ml, - appConfig: appConfig, - ftMutex: sync.Mutex{}, - cutstrings: make(map[string]*regexp.Regexp), +func ModelInference(ctx context.Context, s string, messages []schema.Message, images []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) { + modelFile := c.Model + threads := c.Threads + if *threads == 0 && o.Threads != 0 { + threads = &o.Threads } -} - -// TODO: Should ctx param be removed and replaced with hardcoded req.Context? -func (llmbs *LLMBackendService) Inference(ctx context.Context, req *LLMRequest, bc *config.BackendConfig, enableTokenChannel bool) ( - resultChannel <-chan concurrency.ErrorOr[*LLMResponse], tokenChannel <-chan concurrency.ErrorOr[*LLMResponse], err error) { - - threads := bc.Threads - if (threads == nil || *threads == 0) && llmbs.appConfig.Threads != 0 { - threads = &llmbs.appConfig.Threads - } - - grpcOpts := gRPCModelOpts(bc) + grpcOpts := gRPCModelOpts(c) var inferenceModel grpc.Backend + var err error - opts := modelOpts(bc, llmbs.appConfig, []model.Option{ + opts := modelOpts(c, o, []model.Option{ model.WithLoadGRPCLoadModelOpts(grpcOpts), model.WithThreads(uint32(*threads)), // some models uses this to allocate threads during startup - model.WithAssetDir(llmbs.appConfig.AssetsDestination), - model.WithModel(bc.Model), - model.WithContext(llmbs.appConfig.Context), + model.WithAssetDir(o.AssetsDestination), + model.WithModel(modelFile), + model.WithContext(o.Context), }) - if bc.Backend != "" { - opts = append(opts, model.WithBackendString(bc.Backend)) + if c.Backend != "" { + opts = append(opts, model.WithBackendString(c.Backend)) } - // Check if bc.Model exists, if it doesn't try to load it from the gallery - if llmbs.appConfig.AutoloadGalleries { // experimental - if _, err := os.Stat(bc.Model); os.IsNotExist(err) { + // Check if the modelFile exists, if it doesn't try to load it from the gallery + if o.AutoloadGalleries { // experimental + if _, err := os.Stat(modelFile); os.IsNotExist(err) { utils.ResetDownloadTimers() // if we failed to load the model, we try to download it - err := gallery.InstallModelFromGalleryByName(llmbs.appConfig.Galleries, bc.Model, llmbs.appConfig.ModelPath, gallery.GalleryModel{}, utils.DisplayDownloadFunction) + err := gallery.InstallModelFromGalleryByName(o.Galleries, modelFile, loader.ModelPath, gallery.GalleryModel{}, utils.DisplayDownloadFunction) if err != nil { - return nil, nil, err + return nil, err } } } - if bc.Backend == "" { - log.Debug().Msgf("backend not known for %q, falling back to greedy loader to find it", bc.Model) - inferenceModel, err = llmbs.ml.GreedyLoader(opts...) + if c.Backend == "" { + inferenceModel, err = loader.GreedyLoader(opts...) } else { - inferenceModel, err = llmbs.ml.BackendLoader(opts...) + inferenceModel, err = loader.BackendLoader(opts...) } if err != nil { - log.Error().Err(err).Msg("[llmbs.Inference] failed to load a backend") - return + return nil, err } - grpcPredOpts := gRPCPredictOpts(bc, llmbs.appConfig.ModelPath) - grpcPredOpts.Prompt = req.Text - grpcPredOpts.Images = req.Images - - if bc.TemplateConfig.UseTokenizerTemplate && req.Text == "" { - grpcPredOpts.UseTokenizerTemplate = true - protoMessages := make([]*proto.Message, len(req.RawMessages), len(req.RawMessages)) - for i, message := range req.RawMessages { + var protoMessages []*proto.Message + // if we are using the tokenizer template, we need to convert the messages to proto messages + // unless the prompt has already been tokenized (non-chat endpoints + functions) + if c.TemplateConfig.UseTokenizerTemplate && s == "" { + protoMessages = make([]*proto.Message, len(messages), len(messages)) + for i, message := range messages { protoMessages[i] = &proto.Message{ Role: message.Role, } @@ -129,32 +87,47 @@ func (llmbs *LLMBackendService) Inference(ctx context.Context, req *LLMRequest, case string: protoMessages[i].Content = ct default: - err = fmt.Errorf("unsupported type for schema.Message.Content for inference: %T", ct) - return + return nil, fmt.Errorf("Unsupported type for schema.Message.Content for inference: %T", ct) } } } - tokenUsage := TokenUsage{} + // in GRPC, the backend is supposed to answer to 1 single token if stream is not supported + fn := func() (LLMResponse, error) { + opts := gRPCPredictOpts(c, loader.ModelPath) + opts.Prompt = s + opts.Messages = protoMessages + opts.UseTokenizerTemplate = c.TemplateConfig.UseTokenizerTemplate + opts.Images = images - promptInfo, pErr := inferenceModel.TokenizeString(ctx, grpcPredOpts) - if pErr == nil && promptInfo.Length > 0 { - tokenUsage.Prompt = int(promptInfo.Length) - } + tokenUsage := TokenUsage{} - rawResultChannel := make(chan concurrency.ErrorOr[*LLMResponse]) - // TODO this next line is the biggest argument for taking named return values _back_ out!!! - var rawTokenChannel chan concurrency.ErrorOr[*LLMResponse] + // check the per-model feature flag for usage, since tokenCallback may have a cost. + // Defaults to off as for now it is still experimental + if c.FeatureFlag.Enabled("usage") { + userTokenCallback := tokenCallback + if userTokenCallback == nil { + userTokenCallback = func(token string, usage TokenUsage) bool { + return true + } + } - if enableTokenChannel { - rawTokenChannel = make(chan concurrency.ErrorOr[*LLMResponse]) + promptInfo, pErr := inferenceModel.TokenizeString(ctx, opts) + if pErr == nil && promptInfo.Length > 0 { + tokenUsage.Prompt = int(promptInfo.Length) + } - // TODO Needs better name - ss := "" + tokenCallback = func(token string, usage TokenUsage) bool { + tokenUsage.Completion++ + return userTokenCallback(token, tokenUsage) + } + } + + if tokenCallback != nil { + ss := "" - go func() { var partialRune []byte - err := inferenceModel.PredictStream(ctx, grpcPredOpts, func(chars []byte) { + err := inferenceModel.PredictStream(ctx, opts, func(chars []byte) { partialRune = append(partialRune, chars...) for len(partialRune) > 0 { @@ -164,126 +137,54 @@ func (llmbs *LLMBackendService) Inference(ctx context.Context, req *LLMRequest, break } - tokenUsage.Completion++ - rawTokenChannel <- concurrency.ErrorOr[*LLMResponse]{Value: &LLMResponse{ - Response: string(r), - Usage: tokenUsage, - }} - + tokenCallback(string(r), tokenUsage) ss += string(r) partialRune = partialRune[size:] } }) - close(rawTokenChannel) + return LLMResponse{ + Response: ss, + Usage: tokenUsage, + }, err + } else { + // TODO: Is the chicken bit the only way to get here? is that acceptable? + reply, err := inferenceModel.Predict(ctx, opts) if err != nil { - rawResultChannel <- concurrency.ErrorOr[*LLMResponse]{Error: err} - } else { - rawResultChannel <- concurrency.ErrorOr[*LLMResponse]{Value: &LLMResponse{ - Response: ss, - Usage: tokenUsage, - }} + return LLMResponse{}, err } - close(rawResultChannel) - }() - } else { - go func() { - reply, err := inferenceModel.Predict(ctx, grpcPredOpts) if tokenUsage.Prompt == 0 { tokenUsage.Prompt = int(reply.PromptTokens) } if tokenUsage.Completion == 0 { tokenUsage.Completion = int(reply.Tokens) } - if err != nil { - rawResultChannel <- concurrency.ErrorOr[*LLMResponse]{Error: err} - close(rawResultChannel) - } else { - rawResultChannel <- concurrency.ErrorOr[*LLMResponse]{Value: &LLMResponse{ - Response: string(reply.Message), - Usage: tokenUsage, - }} - close(rawResultChannel) - } - }() + return LLMResponse{ + Response: string(reply.Message), + Usage: tokenUsage, + }, err + } } - resultChannel = rawResultChannel - tokenChannel = rawTokenChannel - return + return fn, nil } -// TODO: Should predInput be a seperate param still, or should this fn handle extracting it from request?? -func (llmbs *LLMBackendService) GenerateText(predInput string, request *schema.OpenAIRequest, bc *config.BackendConfig, - mappingFn func(*LLMResponse) schema.Choice, enableCompletionChannels bool, enableTokenChannels bool) ( - // Returns: - resultChannel <-chan concurrency.ErrorOr[*LLMResponseBundle], completionChannels []<-chan concurrency.ErrorOr[*LLMResponse], tokenChannels []<-chan concurrency.ErrorOr[*LLMResponse], err error) { +var cutstrings map[string]*regexp.Regexp = make(map[string]*regexp.Regexp) +var mu sync.Mutex = sync.Mutex{} - rawChannel := make(chan concurrency.ErrorOr[*LLMResponseBundle]) - resultChannel = rawChannel - - if request.N == 0 { // number of completions to return - request.N = 1 - } - images := []string{} - for _, m := range request.Messages { - images = append(images, m.StringImages...) - } - - for i := 0; i < request.N; i++ { - - individualResultChannel, tokenChannel, infErr := llmbs.Inference(request.Context, &LLMRequest{ - Text: predInput, - Images: images, - RawMessages: request.Messages, - }, bc, enableTokenChannels) - if infErr != nil { - err = infErr // Avoids complaints about redeclaring err but looks dumb - return - } - completionChannels = append(completionChannels, individualResultChannel) - tokenChannels = append(tokenChannels, tokenChannel) - } - - go func() { - initialBundle := LLMResponseBundle{ - Request: request, - Response: []schema.Choice{}, - Usage: TokenUsage{}, - } - - wg := concurrency.SliceOfChannelsReducer(completionChannels, rawChannel, func(iv concurrency.ErrorOr[*LLMResponse], ov concurrency.ErrorOr[*LLMResponseBundle]) concurrency.ErrorOr[*LLMResponseBundle] { - if iv.Error != nil { - ov.Error = iv.Error - // TODO: Decide if we should wipe partials or not? - return ov - } - ov.Value.Usage.Prompt += iv.Value.Usage.Prompt - ov.Value.Usage.Completion += iv.Value.Usage.Completion - - ov.Value.Response = append(ov.Value.Response, mappingFn(iv.Value)) - return ov - }, concurrency.ErrorOr[*LLMResponseBundle]{Value: &initialBundle}, true) - wg.Wait() - - }() - - return -} - -func (llmbs *LLMBackendService) Finetune(config config.BackendConfig, input, prediction string) string { +func Finetune(config config.BackendConfig, input, prediction string) string { if config.Echo { prediction = input + prediction } for _, c := range config.Cutstrings { - llmbs.ftMutex.Lock() - reg, ok := llmbs.cutstrings[c] + mu.Lock() + reg, ok := cutstrings[c] if !ok { - llmbs.cutstrings[c] = regexp.MustCompile(c) - reg = llmbs.cutstrings[c] + cutstrings[c] = regexp.MustCompile(c) + reg = cutstrings[c] } - llmbs.ftMutex.Unlock() + mu.Unlock() prediction = reg.ReplaceAllString(prediction, "") } diff --git a/core/backend/options.go b/core/backend/options.go index 0b4e56db..5b303b05 100644 --- a/core/backend/options.go +++ b/core/backend/options.go @@ -10,7 +10,7 @@ import ( model "github.com/go-skynet/LocalAI/pkg/model" ) -func modelOpts(bc *config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option { +func modelOpts(c config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option { if so.SingleBackend { opts = append(opts, model.WithSingleActiveBackend()) } @@ -19,12 +19,12 @@ func modelOpts(bc *config.BackendConfig, so *config.ApplicationConfig, opts []mo opts = append(opts, model.EnableParallelRequests) } - if bc.GRPC.Attempts != 0 { - opts = append(opts, model.WithGRPCAttempts(bc.GRPC.Attempts)) + if c.GRPC.Attempts != 0 { + opts = append(opts, model.WithGRPCAttempts(c.GRPC.Attempts)) } - if bc.GRPC.AttemptsSleepTime != 0 { - opts = append(opts, model.WithGRPCAttemptsDelay(bc.GRPC.AttemptsSleepTime)) + if c.GRPC.AttemptsSleepTime != 0 { + opts = append(opts, model.WithGRPCAttemptsDelay(c.GRPC.AttemptsSleepTime)) } for k, v := range so.ExternalGRPCBackends { @@ -34,7 +34,7 @@ func modelOpts(bc *config.BackendConfig, so *config.ApplicationConfig, opts []mo return opts } -func getSeed(c *config.BackendConfig) int32 { +func getSeed(c config.BackendConfig) int32 { seed := int32(*c.Seed) if seed == config.RAND_SEED { seed = rand.Int31() @@ -43,7 +43,7 @@ func getSeed(c *config.BackendConfig) int32 { return seed } -func gRPCModelOpts(c *config.BackendConfig) *pb.ModelOptions { +func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions { b := 512 if c.Batch != 0 { b = c.Batch @@ -104,47 +104,47 @@ func gRPCModelOpts(c *config.BackendConfig) *pb.ModelOptions { } } -func gRPCPredictOpts(bc *config.BackendConfig, modelPath string) *pb.PredictOptions { +func gRPCPredictOpts(c config.BackendConfig, modelPath string) *pb.PredictOptions { promptCachePath := "" - if bc.PromptCachePath != "" { - p := filepath.Join(modelPath, bc.PromptCachePath) + if c.PromptCachePath != "" { + p := filepath.Join(modelPath, c.PromptCachePath) os.MkdirAll(filepath.Dir(p), 0755) promptCachePath = p } return &pb.PredictOptions{ - Temperature: float32(*bc.Temperature), - TopP: float32(*bc.TopP), - NDraft: bc.NDraft, - TopK: int32(*bc.TopK), - Tokens: int32(*bc.Maxtokens), - Threads: int32(*bc.Threads), - PromptCacheAll: bc.PromptCacheAll, - PromptCacheRO: bc.PromptCacheRO, + Temperature: float32(*c.Temperature), + TopP: float32(*c.TopP), + NDraft: c.NDraft, + TopK: int32(*c.TopK), + Tokens: int32(*c.Maxtokens), + Threads: int32(*c.Threads), + PromptCacheAll: c.PromptCacheAll, + PromptCacheRO: c.PromptCacheRO, PromptCachePath: promptCachePath, - F16KV: *bc.F16, - DebugMode: *bc.Debug, - Grammar: bc.Grammar, - NegativePromptScale: bc.NegativePromptScale, - RopeFreqBase: bc.RopeFreqBase, - RopeFreqScale: bc.RopeFreqScale, - NegativePrompt: bc.NegativePrompt, - Mirostat: int32(*bc.LLMConfig.Mirostat), - MirostatETA: float32(*bc.LLMConfig.MirostatETA), - MirostatTAU: float32(*bc.LLMConfig.MirostatTAU), - Debug: *bc.Debug, - StopPrompts: bc.StopWords, - Repeat: int32(bc.RepeatPenalty), - NKeep: int32(bc.Keep), - Batch: int32(bc.Batch), - IgnoreEOS: bc.IgnoreEOS, - Seed: getSeed(bc), - FrequencyPenalty: float32(bc.FrequencyPenalty), - MLock: *bc.MMlock, - MMap: *bc.MMap, - MainGPU: bc.MainGPU, - TensorSplit: bc.TensorSplit, - TailFreeSamplingZ: float32(*bc.TFZ), - TypicalP: float32(*bc.TypicalP), + F16KV: *c.F16, + DebugMode: *c.Debug, + Grammar: c.Grammar, + NegativePromptScale: c.NegativePromptScale, + RopeFreqBase: c.RopeFreqBase, + RopeFreqScale: c.RopeFreqScale, + NegativePrompt: c.NegativePrompt, + Mirostat: int32(*c.LLMConfig.Mirostat), + MirostatETA: float32(*c.LLMConfig.MirostatETA), + MirostatTAU: float32(*c.LLMConfig.MirostatTAU), + Debug: *c.Debug, + StopPrompts: c.StopWords, + Repeat: int32(c.RepeatPenalty), + NKeep: int32(c.Keep), + Batch: int32(c.Batch), + IgnoreEOS: c.IgnoreEOS, + Seed: getSeed(c), + FrequencyPenalty: float32(c.FrequencyPenalty), + MLock: *c.MMlock, + MMap: *c.MMap, + MainGPU: c.MainGPU, + TensorSplit: c.TensorSplit, + TailFreeSamplingZ: float32(*c.TFZ), + TypicalP: float32(*c.TypicalP), } } diff --git a/core/backend/transcript.go b/core/backend/transcript.go index 6761c2ac..4c3859df 100644 --- a/core/backend/transcript.go +++ b/core/backend/transcript.go @@ -7,48 +7,11 @@ import ( "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/schema" - "github.com/go-skynet/LocalAI/pkg/concurrency" "github.com/go-skynet/LocalAI/pkg/grpc/proto" - "github.com/go-skynet/LocalAI/pkg/model" + model "github.com/go-skynet/LocalAI/pkg/model" ) -type TranscriptionBackendService struct { - ml *model.ModelLoader - bcl *config.BackendConfigLoader - appConfig *config.ApplicationConfig -} - -func NewTranscriptionBackendService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *TranscriptionBackendService { - return &TranscriptionBackendService{ - ml: ml, - bcl: bcl, - appConfig: appConfig, - } -} - -func (tbs *TranscriptionBackendService) Transcribe(request *schema.OpenAIRequest) <-chan concurrency.ErrorOr[*schema.TranscriptionResult] { - responseChannel := make(chan concurrency.ErrorOr[*schema.TranscriptionResult]) - go func(request *schema.OpenAIRequest) { - bc, request, err := tbs.bcl.LoadBackendConfigForModelAndOpenAIRequest(request.Model, request, tbs.appConfig) - if err != nil { - responseChannel <- concurrency.ErrorOr[*schema.TranscriptionResult]{Error: fmt.Errorf("failed reading parameters from request:%w", err)} - close(responseChannel) - return - } - - tr, err := modelTranscription(request.File, request.Language, tbs.ml, bc, tbs.appConfig) - if err != nil { - responseChannel <- concurrency.ErrorOr[*schema.TranscriptionResult]{Error: err} - close(responseChannel) - return - } - responseChannel <- concurrency.ErrorOr[*schema.TranscriptionResult]{Value: tr} - close(responseChannel) - }(request) - return responseChannel -} - -func modelTranscription(audio, language string, ml *model.ModelLoader, backendConfig *config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) { +func ModelTranscription(audio, language string, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.Result, error) { opts := modelOpts(backendConfig, appConfig, []model.Option{ model.WithBackendString(model.WhisperBackend), diff --git a/core/backend/tts.go b/core/backend/tts.go index d1fa270d..f97b6202 100644 --- a/core/backend/tts.go +++ b/core/backend/tts.go @@ -7,60 +7,29 @@ import ( "path/filepath" "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/schema" - "github.com/go-skynet/LocalAI/pkg/concurrency" "github.com/go-skynet/LocalAI/pkg/grpc/proto" - "github.com/go-skynet/LocalAI/pkg/model" + model "github.com/go-skynet/LocalAI/pkg/model" "github.com/go-skynet/LocalAI/pkg/utils" ) -type TextToSpeechBackendService struct { - ml *model.ModelLoader - bcl *config.BackendConfigLoader - appConfig *config.ApplicationConfig -} +func generateUniqueFileName(dir, baseName, ext string) string { + counter := 1 + fileName := baseName + ext -func NewTextToSpeechBackendService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *TextToSpeechBackendService { - return &TextToSpeechBackendService{ - ml: ml, - bcl: bcl, - appConfig: appConfig, + for { + filePath := filepath.Join(dir, fileName) + _, err := os.Stat(filePath) + if os.IsNotExist(err) { + return fileName + } + + counter++ + fileName = fmt.Sprintf("%s_%d%s", baseName, counter, ext) } } -func (ttsbs *TextToSpeechBackendService) TextToAudioFile(request *schema.TTSRequest) <-chan concurrency.ErrorOr[*string] { - responseChannel := make(chan concurrency.ErrorOr[*string]) - go func(request *schema.TTSRequest) { - cfg, err := ttsbs.bcl.LoadBackendConfigFileByName(request.Model, ttsbs.appConfig.ModelPath, - config.LoadOptionDebug(ttsbs.appConfig.Debug), - config.LoadOptionThreads(ttsbs.appConfig.Threads), - config.LoadOptionContextSize(ttsbs.appConfig.ContextSize), - config.LoadOptionF16(ttsbs.appConfig.F16), - ) - if err != nil { - responseChannel <- concurrency.ErrorOr[*string]{Error: err} - close(responseChannel) - return - } - - if request.Backend != "" { - cfg.Backend = request.Backend - } - - outFile, _, err := modelTTS(cfg.Backend, request.Input, cfg.Model, request.Voice, ttsbs.ml, ttsbs.appConfig, cfg) - if err != nil { - responseChannel <- concurrency.ErrorOr[*string]{Error: err} - close(responseChannel) - return - } - responseChannel <- concurrency.ErrorOr[*string]{Value: &outFile} - close(responseChannel) - }(request) - return responseChannel -} - -func modelTTS(backend, text, modelFile string, voice string, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig *config.BackendConfig) (string, *proto.Result, error) { +func ModelTTS(backend, text, modelFile, voice string, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (string, *proto.Result, error) { bb := backend if bb == "" { bb = model.PiperBackend @@ -68,7 +37,7 @@ func modelTTS(backend, text, modelFile string, voice string, loader *model.Model grpcOpts := gRPCModelOpts(backendConfig) - opts := modelOpts(&config.BackendConfig{}, appConfig, []model.Option{ + opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{ model.WithBackendString(bb), model.WithModel(modelFile), model.WithContext(appConfig.Context), @@ -118,19 +87,3 @@ func modelTTS(backend, text, modelFile string, voice string, loader *model.Model return filePath, res, err } - -func generateUniqueFileName(dir, baseName, ext string) string { - counter := 1 - fileName := baseName + ext - - for { - filePath := filepath.Join(dir, fileName) - _, err := os.Stat(filePath) - if os.IsNotExist(err) { - return fileName - } - - counter++ - fileName = fmt.Sprintf("%s_%d%s", baseName, counter, ext) - } -} diff --git a/core/cli/run.go b/core/cli/run.go index cafc0b54..0f3ba2de 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -124,11 +124,11 @@ func (r *RunCMD) Run(ctx *Context) error { } if r.PreloadBackendOnly { - _, err := startup.Startup(opts...) + _, _, _, err := startup.Startup(opts...) return err } - application, err := startup.Startup(opts...) + cl, ml, options, err := startup.Startup(opts...) if err != nil { return fmt.Errorf("failed basic startup tasks with error %s", err.Error()) @@ -137,7 +137,7 @@ func (r *RunCMD) Run(ctx *Context) error { // Watch the configuration directory // If the directory does not exist, we don't watch it if _, err := os.Stat(r.LocalaiConfigDir); err == nil { - closeConfigWatcherFn, err := startup.WatchConfigDirectory(r.LocalaiConfigDir, application.ApplicationConfig) + closeConfigWatcherFn, err := startup.WatchConfigDirectory(r.LocalaiConfigDir, options) defer closeConfigWatcherFn() if err != nil { @@ -145,7 +145,7 @@ func (r *RunCMD) Run(ctx *Context) error { } } - appHTTP, err := http.App(application) + appHTTP, err := http.App(cl, ml, options) if err != nil { log.Error().Err(err).Msg("error during HTTP App construction") return err diff --git a/core/cli/transcript.go b/core/cli/transcript.go index f14a1a87..9f36a77c 100644 --- a/core/cli/transcript.go +++ b/core/cli/transcript.go @@ -7,7 +7,6 @@ import ( "github.com/go-skynet/LocalAI/core/backend" "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/schema" "github.com/go-skynet/LocalAI/pkg/model" ) @@ -44,21 +43,11 @@ func (t *TranscriptCMD) Run(ctx *Context) error { defer ml.StopAllGRPC() - tbs := backend.NewTranscriptionBackendService(ml, cl, opts) - - resultChannel := tbs.Transcribe(&schema.OpenAIRequest{ - PredictionOptions: schema.PredictionOptions{ - Language: t.Language, - }, - File: t.Filename, - }) - - r := <-resultChannel - - if r.Error != nil { - return r.Error + tr, err := backend.ModelTranscription(t.Filename, t.Language, ml, c, opts) + if err != nil { + return err } - for _, segment := range r.Value.Segments { + for _, segment := range tr.Segments { fmt.Println(segment.Start.String(), "-", segment.Text) } return nil diff --git a/core/cli/tts.go b/core/cli/tts.go index c7758c48..1d8fd3a3 100644 --- a/core/cli/tts.go +++ b/core/cli/tts.go @@ -9,7 +9,6 @@ import ( "github.com/go-skynet/LocalAI/core/backend" "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/schema" "github.com/go-skynet/LocalAI/pkg/model" ) @@ -43,29 +42,20 @@ func (t *TTSCMD) Run(ctx *Context) error { defer ml.StopAllGRPC() - ttsbs := backend.NewTextToSpeechBackendService(ml, config.NewBackendConfigLoader(), opts) + options := config.BackendConfig{} + options.SetDefaults() - request := &schema.TTSRequest{ - Model: t.Model, - Input: text, - Backend: t.Backend, - Voice: t.Voice, - } - - resultsChannel := ttsbs.TextToAudioFile(request) - - rawResult := <-resultsChannel - - if rawResult.Error != nil { - return rawResult.Error + filePath, _, err := backend.ModelTTS(t.Backend, text, t.Model, t.Voice, ml, opts, options) + if err != nil { + return err } if outputFile != "" { - if err := os.Rename(*rawResult.Value, outputFile); err != nil { + if err := os.Rename(filePath, outputFile); err != nil { return err } - fmt.Printf("Generated file %q\n", outputFile) + fmt.Printf("Generate file %s\n", outputFile) } else { - fmt.Printf("Generated file %q\n", *rawResult.Value) + fmt.Printf("Generate file %s\n", filePath) } return nil } diff --git a/core/config/backend_config.go b/core/config/backend_config.go index 47e4829d..81c92d01 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -1,7 +1,22 @@ package config import ( + "errors" + "fmt" + "io/fs" + "os" + "path/filepath" + "sort" + "strings" + "sync" + "github.com/go-skynet/LocalAI/core/schema" + "github.com/go-skynet/LocalAI/pkg/downloader" + "github.com/go-skynet/LocalAI/pkg/utils" + "github.com/rs/zerolog/log" + "gopkg.in/yaml.v3" + + "github.com/charmbracelet/glamour" ) const ( @@ -184,7 +199,7 @@ func (c *BackendConfig) FunctionToCall() string { } func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) { - lo := &ConfigLoaderOptions{} + lo := &LoadOptions{} lo.Apply(opts...) ctx := lo.ctxSize @@ -297,3 +312,287 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) { cfg.Debug = &trueV } } + +////// Config Loader //////// + +type BackendConfigLoader struct { + configs map[string]BackendConfig + sync.Mutex +} + +type LoadOptions struct { + debug bool + threads, ctxSize int + f16 bool +} + +func LoadOptionDebug(debug bool) ConfigLoaderOption { + return func(o *LoadOptions) { + o.debug = debug + } +} + +func LoadOptionThreads(threads int) ConfigLoaderOption { + return func(o *LoadOptions) { + o.threads = threads + } +} + +func LoadOptionContextSize(ctxSize int) ConfigLoaderOption { + return func(o *LoadOptions) { + o.ctxSize = ctxSize + } +} + +func LoadOptionF16(f16 bool) ConfigLoaderOption { + return func(o *LoadOptions) { + o.f16 = f16 + } +} + +type ConfigLoaderOption func(*LoadOptions) + +func (lo *LoadOptions) Apply(options ...ConfigLoaderOption) { + for _, l := range options { + l(lo) + } +} + +// Load a config file for a model +func (cl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath string, opts ...ConfigLoaderOption) (*BackendConfig, error) { + + // Load a config file if present after the model name + cfg := &BackendConfig{ + PredictionOptions: schema.PredictionOptions{ + Model: modelName, + }, + } + + cfgExisting, exists := cl.GetBackendConfig(modelName) + if exists { + cfg = &cfgExisting + } else { + // Try loading a model config file + modelConfig := filepath.Join(modelPath, modelName+".yaml") + if _, err := os.Stat(modelConfig); err == nil { + if err := cl.LoadBackendConfig( + modelConfig, opts..., + ); err != nil { + return nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error()) + } + cfgExisting, exists = cl.GetBackendConfig(modelName) + if exists { + cfg = &cfgExisting + } + } + } + + cfg.SetDefaults(opts...) + + return cfg, nil +} + +func NewBackendConfigLoader() *BackendConfigLoader { + return &BackendConfigLoader{ + configs: make(map[string]BackendConfig), + } +} +func ReadBackendConfigFile(file string, opts ...ConfigLoaderOption) ([]*BackendConfig, error) { + c := &[]*BackendConfig{} + f, err := os.ReadFile(file) + if err != nil { + return nil, fmt.Errorf("cannot read config file: %w", err) + } + if err := yaml.Unmarshal(f, c); err != nil { + return nil, fmt.Errorf("cannot unmarshal config file: %w", err) + } + + for _, cc := range *c { + cc.SetDefaults(opts...) + } + + return *c, nil +} + +func ReadBackendConfig(file string, opts ...ConfigLoaderOption) (*BackendConfig, error) { + lo := &LoadOptions{} + lo.Apply(opts...) + + c := &BackendConfig{} + f, err := os.ReadFile(file) + if err != nil { + return nil, fmt.Errorf("cannot read config file: %w", err) + } + if err := yaml.Unmarshal(f, c); err != nil { + return nil, fmt.Errorf("cannot unmarshal config file: %w", err) + } + + c.SetDefaults(opts...) + return c, nil +} + +func (cm *BackendConfigLoader) LoadBackendConfigFile(file string, opts ...ConfigLoaderOption) error { + cm.Lock() + defer cm.Unlock() + c, err := ReadBackendConfigFile(file, opts...) + if err != nil { + return fmt.Errorf("cannot load config file: %w", err) + } + + for _, cc := range c { + cm.configs[cc.Name] = *cc + } + return nil +} + +func (cl *BackendConfigLoader) LoadBackendConfig(file string, opts ...ConfigLoaderOption) error { + cl.Lock() + defer cl.Unlock() + c, err := ReadBackendConfig(file, opts...) + if err != nil { + return fmt.Errorf("cannot read config file: %w", err) + } + + cl.configs[c.Name] = *c + return nil +} + +func (cl *BackendConfigLoader) GetBackendConfig(m string) (BackendConfig, bool) { + cl.Lock() + defer cl.Unlock() + v, exists := cl.configs[m] + return v, exists +} + +func (cl *BackendConfigLoader) GetAllBackendConfigs() []BackendConfig { + cl.Lock() + defer cl.Unlock() + var res []BackendConfig + for _, v := range cl.configs { + res = append(res, v) + } + + sort.SliceStable(res, func(i, j int) bool { + return res[i].Name < res[j].Name + }) + + return res +} + +func (cl *BackendConfigLoader) ListBackendConfigs() []string { + cl.Lock() + defer cl.Unlock() + var res []string + for k := range cl.configs { + res = append(res, k) + } + return res +} + +// Preload prepare models if they are not local but url or huggingface repositories +func (cl *BackendConfigLoader) Preload(modelPath string) error { + cl.Lock() + defer cl.Unlock() + + status := func(fileName, current, total string, percent float64) { + utils.DisplayDownloadFunction(fileName, current, total, percent) + } + + log.Info().Msgf("Preloading models from %s", modelPath) + + renderMode := "dark" + if os.Getenv("COLOR") != "" { + renderMode = os.Getenv("COLOR") + } + + glamText := func(t string) { + out, err := glamour.Render(t, renderMode) + if err == nil && os.Getenv("NO_COLOR") == "" { + fmt.Println(out) + } else { + fmt.Println(t) + } + } + + for i, config := range cl.configs { + + // Download files and verify their SHA + for _, file := range config.DownloadFiles { + log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename) + + if err := utils.VerifyPath(file.Filename, modelPath); err != nil { + return err + } + // Create file path + filePath := filepath.Join(modelPath, file.Filename) + + if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, status); err != nil { + return err + } + } + + modelURL := config.PredictionOptions.Model + modelURL = downloader.ConvertURL(modelURL) + + if downloader.LooksLikeURL(modelURL) { + // md5 of model name + md5Name := utils.MD5(modelURL) + + // check if file exists + if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) { + err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", status) + if err != nil { + return err + } + } + + cc := cl.configs[i] + c := &cc + c.PredictionOptions.Model = md5Name + cl.configs[i] = *c + } + if cl.configs[i].Name != "" { + glamText(fmt.Sprintf("**Model name**: _%s_", cl.configs[i].Name)) + } + if cl.configs[i].Description != "" { + //glamText("**Description**") + glamText(cl.configs[i].Description) + } + if cl.configs[i].Usage != "" { + //glamText("**Usage**") + glamText(cl.configs[i].Usage) + } + } + return nil +} + +// LoadBackendConfigsFromPath reads all the configurations of the models from a path +// (non-recursive) +func (cm *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...ConfigLoaderOption) error { + cm.Lock() + defer cm.Unlock() + entries, err := os.ReadDir(path) + if err != nil { + return err + } + files := make([]fs.FileInfo, 0, len(entries)) + for _, entry := range entries { + info, err := entry.Info() + if err != nil { + return err + } + files = append(files, info) + } + for _, file := range files { + // Skip templates, YAML and .keep files + if !strings.Contains(file.Name(), ".yaml") && !strings.Contains(file.Name(), ".yml") { + continue + } + c, err := ReadBackendConfig(filepath.Join(path, file.Name()), opts...) + if err == nil { + cm.configs[c.Name] = *c + } + } + + return nil +} diff --git a/core/config/backend_config_loader.go b/core/config/backend_config_loader.go deleted file mode 100644 index 62dfc1e0..00000000 --- a/core/config/backend_config_loader.go +++ /dev/null @@ -1,509 +0,0 @@ -package config - -import ( - "encoding/json" - "errors" - "fmt" - "io/fs" - "os" - "path/filepath" - "sort" - "strings" - "sync" - - "github.com/charmbracelet/glamour" - "github.com/go-skynet/LocalAI/core/schema" - "github.com/go-skynet/LocalAI/pkg/downloader" - "github.com/go-skynet/LocalAI/pkg/grammar" - "github.com/go-skynet/LocalAI/pkg/utils" - "github.com/rs/zerolog/log" - "gopkg.in/yaml.v2" -) - -type BackendConfigLoader struct { - configs map[string]BackendConfig - sync.Mutex -} - -type ConfigLoaderOptions struct { - debug bool - threads, ctxSize int - f16 bool -} - -func LoadOptionDebug(debug bool) ConfigLoaderOption { - return func(o *ConfigLoaderOptions) { - o.debug = debug - } -} - -func LoadOptionThreads(threads int) ConfigLoaderOption { - return func(o *ConfigLoaderOptions) { - o.threads = threads - } -} - -func LoadOptionContextSize(ctxSize int) ConfigLoaderOption { - return func(o *ConfigLoaderOptions) { - o.ctxSize = ctxSize - } -} - -func LoadOptionF16(f16 bool) ConfigLoaderOption { - return func(o *ConfigLoaderOptions) { - o.f16 = f16 - } -} - -type ConfigLoaderOption func(*ConfigLoaderOptions) - -func (lo *ConfigLoaderOptions) Apply(options ...ConfigLoaderOption) { - for _, l := range options { - l(lo) - } -} - -func NewBackendConfigLoader() *BackendConfigLoader { - return &BackendConfigLoader{ - configs: make(map[string]BackendConfig), - } -} - -func (bcl *BackendConfigLoader) LoadBackendConfig(file string, opts ...ConfigLoaderOption) error { - bcl.Lock() - defer bcl.Unlock() - c, err := readBackendConfig(file, opts...) - if err != nil { - return fmt.Errorf("cannot read config file: %w", err) - } - - bcl.configs[c.Name] = *c - return nil -} - -func (bcl *BackendConfigLoader) GetBackendConfig(m string) (BackendConfig, bool) { - bcl.Lock() - defer bcl.Unlock() - v, exists := bcl.configs[m] - return v, exists -} - -func (bcl *BackendConfigLoader) GetAllBackendConfigs() []BackendConfig { - bcl.Lock() - defer bcl.Unlock() - var res []BackendConfig - for _, v := range bcl.configs { - res = append(res, v) - } - sort.SliceStable(res, func(i, j int) bool { - return res[i].Name < res[j].Name - }) - return res -} - -func (bcl *BackendConfigLoader) ListBackendConfigs() []string { - bcl.Lock() - defer bcl.Unlock() - var res []string - for k := range bcl.configs { - res = append(res, k) - } - return res -} - -// Preload prepare models if they are not local but url or huggingface repositories -func (bcl *BackendConfigLoader) Preload(modelPath string) error { - bcl.Lock() - defer bcl.Unlock() - - status := func(fileName, current, total string, percent float64) { - utils.DisplayDownloadFunction(fileName, current, total, percent) - } - - log.Info().Msgf("Preloading models from %s", modelPath) - - renderMode := "dark" - if os.Getenv("COLOR") != "" { - renderMode = os.Getenv("COLOR") - } - - glamText := func(t string) { - out, err := glamour.Render(t, renderMode) - if err == nil && os.Getenv("NO_COLOR") == "" { - fmt.Println(out) - } else { - fmt.Println(t) - } - } - - for i, config := range bcl.configs { - - // Download files and verify their SHA - for _, file := range config.DownloadFiles { - log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename) - - if err := utils.VerifyPath(file.Filename, modelPath); err != nil { - return err - } - // Create file path - filePath := filepath.Join(modelPath, file.Filename) - - if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, status); err != nil { - return err - } - } - - modelURL := config.PredictionOptions.Model - modelURL = downloader.ConvertURL(modelURL) - - if downloader.LooksLikeURL(modelURL) { - // md5 of model name - md5Name := utils.MD5(modelURL) - - // check if file exists - if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) { - err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", status) - if err != nil { - return err - } - } - - cc := bcl.configs[i] - c := &cc - c.PredictionOptions.Model = md5Name - bcl.configs[i] = *c - } - if bcl.configs[i].Name != "" { - glamText(fmt.Sprintf("**Model name**: _%s_", bcl.configs[i].Name)) - } - if bcl.configs[i].Description != "" { - //glamText("**Description**") - glamText(bcl.configs[i].Description) - } - if bcl.configs[i].Usage != "" { - //glamText("**Usage**") - glamText(bcl.configs[i].Usage) - } - } - return nil -} - -func (bcl *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...ConfigLoaderOption) error { - bcl.Lock() - defer bcl.Unlock() - entries, err := os.ReadDir(path) - if err != nil { - return err - } - files := make([]fs.FileInfo, 0, len(entries)) - for _, entry := range entries { - info, err := entry.Info() - if err != nil { - return err - } - files = append(files, info) - } - for _, file := range files { - // Skip templates, YAML and .keep files - if !strings.Contains(file.Name(), ".yaml") && !strings.Contains(file.Name(), ".yml") { - continue - } - c, err := readBackendConfig(filepath.Join(path, file.Name()), opts...) - if err == nil { - bcl.configs[c.Name] = *c - } - } - - return nil -} - -func (bcl *BackendConfigLoader) LoadBackendConfigFile(file string, opts ...ConfigLoaderOption) error { - bcl.Lock() - defer bcl.Unlock() - c, err := readBackendConfigFile(file, opts...) - if err != nil { - return fmt.Errorf("cannot load config file: %w", err) - } - - for _, cc := range c { - bcl.configs[cc.Name] = *cc - } - return nil -} - -////////// - -// Load a config file for a model -func (bcl *BackendConfigLoader) LoadBackendConfigFileByName(modelName string, modelPath string, opts ...ConfigLoaderOption) (*BackendConfig, error) { - - // Load a config file if present after the model name - cfg := &BackendConfig{ - PredictionOptions: schema.PredictionOptions{ - Model: modelName, - }, - } - - cfgExisting, exists := bcl.GetBackendConfig(modelName) - if exists { - cfg = &cfgExisting - } else { - // Load a config file if present after the model name - modelConfig := filepath.Join(modelPath, modelName+".yaml") - if _, err := os.Stat(modelConfig); err == nil { - if err := bcl.LoadBackendConfig(modelConfig); err != nil { - return nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error()) - } - cfgExisting, exists = bcl.GetBackendConfig(modelName) - if exists { - cfg = &cfgExisting - } - } - } - - cfg.SetDefaults(opts...) - return cfg, nil -} - -func readBackendConfigFile(file string, opts ...ConfigLoaderOption) ([]*BackendConfig, error) { - c := &[]*BackendConfig{} - f, err := os.ReadFile(file) - if err != nil { - return nil, fmt.Errorf("cannot read config file: %w", err) - } - if err := yaml.Unmarshal(f, c); err != nil { - return nil, fmt.Errorf("cannot unmarshal config file: %w", err) - } - - for _, cc := range *c { - cc.SetDefaults(opts...) - } - - return *c, nil -} - -func readBackendConfig(file string, opts ...ConfigLoaderOption) (*BackendConfig, error) { - c := &BackendConfig{} - f, err := os.ReadFile(file) - if err != nil { - return nil, fmt.Errorf("cannot read config file: %w", err) - } - if err := yaml.Unmarshal(f, c); err != nil { - return nil, fmt.Errorf("cannot unmarshal config file: %w", err) - } - - c.SetDefaults(opts...) - return c, nil -} - -func (bcl *BackendConfigLoader) LoadBackendConfigForModelAndOpenAIRequest(modelFile string, input *schema.OpenAIRequest, appConfig *ApplicationConfig) (*BackendConfig, *schema.OpenAIRequest, error) { - cfg, err := bcl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath, - LoadOptionContextSize(appConfig.ContextSize), - LoadOptionDebug(appConfig.Debug), - LoadOptionF16(appConfig.F16), - LoadOptionThreads(appConfig.Threads), - ) - - // Set the parameters for the language model prediction - updateBackendConfigFromOpenAIRequest(cfg, input) - - return cfg, input, err -} - -func updateBackendConfigFromOpenAIRequest(bc *BackendConfig, request *schema.OpenAIRequest) { - if request.Echo { - bc.Echo = request.Echo - } - if request.TopK != nil && *request.TopK != 0 { - bc.TopK = request.TopK - } - if request.TopP != nil && *request.TopP != 0 { - bc.TopP = request.TopP - } - - if request.Backend != "" { - bc.Backend = request.Backend - } - - if request.ClipSkip != 0 { - bc.Diffusers.ClipSkip = request.ClipSkip - } - - if request.ModelBaseName != "" { - bc.AutoGPTQ.ModelBaseName = request.ModelBaseName - } - - if request.NegativePromptScale != 0 { - bc.NegativePromptScale = request.NegativePromptScale - } - - if request.UseFastTokenizer { - bc.UseFastTokenizer = request.UseFastTokenizer - } - - if request.NegativePrompt != "" { - bc.NegativePrompt = request.NegativePrompt - } - - if request.RopeFreqBase != 0 { - bc.RopeFreqBase = request.RopeFreqBase - } - - if request.RopeFreqScale != 0 { - bc.RopeFreqScale = request.RopeFreqScale - } - - if request.Grammar != "" { - bc.Grammar = request.Grammar - } - - if request.Temperature != nil && *request.Temperature != 0 { - bc.Temperature = request.Temperature - } - - if request.Maxtokens != nil && *request.Maxtokens != 0 { - bc.Maxtokens = request.Maxtokens - } - - switch stop := request.Stop.(type) { - case string: - if stop != "" { - bc.StopWords = append(bc.StopWords, stop) - } - case []interface{}: - for _, pp := range stop { - if s, ok := pp.(string); ok { - bc.StopWords = append(bc.StopWords, s) - } - } - } - - if len(request.Tools) > 0 { - for _, tool := range request.Tools { - request.Functions = append(request.Functions, tool.Function) - } - } - - if request.ToolsChoice != nil { - var toolChoice grammar.Tool - switch content := request.ToolsChoice.(type) { - case string: - _ = json.Unmarshal([]byte(content), &toolChoice) - case map[string]interface{}: - dat, _ := json.Marshal(content) - _ = json.Unmarshal(dat, &toolChoice) - } - request.FunctionCall = map[string]interface{}{ - "name": toolChoice.Function.Name, - } - } - - // Decode each request's message content - index := 0 - for i, m := range request.Messages { - switch content := m.Content.(type) { - case string: - request.Messages[i].StringContent = content - case []interface{}: - dat, _ := json.Marshal(content) - c := []schema.Content{} - json.Unmarshal(dat, &c) - for _, pp := range c { - if pp.Type == "text" { - request.Messages[i].StringContent = pp.Text - } else if pp.Type == "image_url" { - // Detect if pp.ImageURL is an URL, if it is download the image and encode it in base64: - base64, err := utils.GetImageURLAsBase64(pp.ImageURL.URL) - if err == nil { - request.Messages[i].StringImages = append(request.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff - // set a placeholder for each image - request.Messages[i].StringContent = fmt.Sprintf("[img-%d]", index) + request.Messages[i].StringContent - index++ - } else { - fmt.Print("Failed encoding image", err) - } - } - } - } - } - - if request.RepeatPenalty != 0 { - bc.RepeatPenalty = request.RepeatPenalty - } - - if request.FrequencyPenalty != 0 { - bc.FrequencyPenalty = request.FrequencyPenalty - } - - if request.PresencePenalty != 0 { - bc.PresencePenalty = request.PresencePenalty - } - - if request.Keep != 0 { - bc.Keep = request.Keep - } - - if request.Batch != 0 { - bc.Batch = request.Batch - } - - if request.IgnoreEOS { - bc.IgnoreEOS = request.IgnoreEOS - } - - if request.Seed != nil { - bc.Seed = request.Seed - } - - if request.TypicalP != nil { - bc.TypicalP = request.TypicalP - } - - switch inputs := request.Input.(type) { - case string: - if inputs != "" { - bc.InputStrings = append(bc.InputStrings, inputs) - } - case []interface{}: - for _, pp := range inputs { - switch i := pp.(type) { - case string: - bc.InputStrings = append(bc.InputStrings, i) - case []interface{}: - tokens := []int{} - for _, ii := range i { - tokens = append(tokens, int(ii.(float64))) - } - bc.InputToken = append(bc.InputToken, tokens) - } - } - } - - // Can be either a string or an object - switch fnc := request.FunctionCall.(type) { - case string: - if fnc != "" { - bc.SetFunctionCallString(fnc) - } - case map[string]interface{}: - var name string - n, exists := fnc["name"] - if exists { - nn, e := n.(string) - if e { - name = nn - } - } - bc.SetFunctionCallNameString(name) - } - - switch p := request.Prompt.(type) { - case string: - bc.PromptStrings = append(bc.PromptStrings, p) - case []interface{}: - for _, pp := range p { - if s, ok := pp.(string); ok { - bc.PromptStrings = append(bc.PromptStrings, s) - } - } - } -} diff --git a/core/config/exports_test.go b/core/config/exports_test.go deleted file mode 100644 index 70ba84e6..00000000 --- a/core/config/exports_test.go +++ /dev/null @@ -1,6 +0,0 @@ -package config - -// This file re-exports private functions to be used directly in unit tests. -// Since this file's name ends in _test.go, theoretically these should not be exposed past the tests. - -var ReadBackendConfigFile = readBackendConfigFile diff --git a/core/http/api.go b/core/http/api.go index 7094899a..af38512a 100644 --- a/core/http/api.go +++ b/core/http/api.go @@ -1,20 +1,23 @@ package http import ( + "encoding/json" "errors" + "os" "strings" - "github.com/go-skynet/LocalAI/core" - fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" + "github.com/go-skynet/LocalAI/pkg/utils" "github.com/gofiber/swagger" // swagger handler "github.com/go-skynet/LocalAI/core/http/endpoints/elevenlabs" "github.com/go-skynet/LocalAI/core/http/endpoints/localai" "github.com/go-skynet/LocalAI/core/http/endpoints/openai" + + "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/schema" "github.com/go-skynet/LocalAI/core/services" "github.com/go-skynet/LocalAI/internal" - model "github.com/go-skynet/LocalAI/pkg/model" + "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2/middleware/cors" @@ -52,12 +55,13 @@ func readAuthHeader(c *fiber.Ctx) string { // @securityDefinitions.apikey BearerAuth // @in header // @name Authorization -func App(application *core.Application) (*fiber.App, error) { + +func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (*fiber.App, error) { // Return errors as JSON responses app := fiber.New(fiber.Config{ Views: renderEngine(), - BodyLimit: application.ApplicationConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB - DisableStartupMessage: application.ApplicationConfig.DisableMessage, + BodyLimit: appConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB + DisableStartupMessage: appConfig.DisableMessage, // Override default error handler ErrorHandler: func(ctx *fiber.Ctx, err error) error { // Status code defaults to 500 @@ -78,7 +82,7 @@ func App(application *core.Application) (*fiber.App, error) { }, }) - if application.ApplicationConfig.Debug { + if appConfig.Debug { app.Use(logger.New(logger.Config{ Format: "[${ip}]:${port} ${status} - ${method} ${path}\n", })) @@ -86,7 +90,7 @@ func App(application *core.Application) (*fiber.App, error) { // Default middleware config - if !application.ApplicationConfig.Debug { + if !appConfig.Debug { app.Use(recover.New()) } @@ -104,7 +108,25 @@ func App(application *core.Application) (*fiber.App, error) { // Auth middleware checking if API key is valid. If no API key is set, no auth is required. auth := func(c *fiber.Ctx) error { - if len(application.ApplicationConfig.ApiKeys) == 0 { + if len(appConfig.ApiKeys) == 0 { + return c.Next() + } + + // Check for api_keys.json file + fileContent, err := os.ReadFile("api_keys.json") + if err == nil { + // Parse JSON content from the file + var fileKeys []string + err := json.Unmarshal(fileContent, &fileKeys) + if err != nil { + return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{"message": "Error parsing api_keys.json"}) + } + + // Add file keys to options.ApiKeys + appConfig.ApiKeys = append(appConfig.ApiKeys, fileKeys...) + } + + if len(appConfig.ApiKeys) == 0 { return c.Next() } @@ -120,7 +142,7 @@ func App(application *core.Application) (*fiber.App, error) { } apiKey := authHeaderParts[1] - for _, key := range application.ApplicationConfig.ApiKeys { + for _, key := range appConfig.ApiKeys { if apiKey == key { return c.Next() } @@ -129,22 +151,20 @@ func App(application *core.Application) (*fiber.App, error) { return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid API key"}) } - if application.ApplicationConfig.CORS { + if appConfig.CORS { var c func(ctx *fiber.Ctx) error - if application.ApplicationConfig.CORSAllowOrigins == "" { + if appConfig.CORSAllowOrigins == "" { c = cors.New() } else { - c = cors.New(cors.Config{AllowOrigins: application.ApplicationConfig.CORSAllowOrigins}) + c = cors.New(cors.Config{AllowOrigins: appConfig.CORSAllowOrigins}) } app.Use(c) } - fiberContextExtractor := fiberContext.NewFiberContextExtractor(application.ModelLoader, application.ApplicationConfig) - // LocalAI API endpoints - galleryService := services.NewGalleryService(application.ApplicationConfig.ModelPath) - galleryService.Start(application.ApplicationConfig.Context, application.BackendConfigLoader) + galleryService := services.NewGalleryService(appConfig.ModelPath) + galleryService.Start(appConfig.Context, cl) app.Get("/version", auth, func(c *fiber.Ctx) error { return c.JSON(struct { @@ -152,17 +172,29 @@ func App(application *core.Application) (*fiber.App, error) { }{Version: internal.PrintableVersion()}) }) + // Make sure directories exists + os.MkdirAll(appConfig.ImageDir, 0755) + os.MkdirAll(appConfig.AudioDir, 0755) + os.MkdirAll(appConfig.UploadDir, 0755) + os.MkdirAll(appConfig.ConfigsDir, 0755) + os.MkdirAll(appConfig.ModelPath, 0755) + + // Load config jsons + utils.LoadConfig(appConfig.UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles) + utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants) + utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles) + app.Get("/swagger/*", swagger.HandlerDefault) // default welcomeRoute( app, - application.BackendConfigLoader, - application.ModelLoader, - application.ApplicationConfig, + cl, + ml, + appConfig, auth, ) - modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(application.ApplicationConfig.Galleries, application.ApplicationConfig.ModelPath, galleryService) + modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService) app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint()) app.Get("/models/available", auth, modelGalleryEndpointService.ListModelFromGalleryEndpoint()) app.Get("/models/galleries", auth, modelGalleryEndpointService.ListModelGalleriesEndpoint()) @@ -171,85 +203,83 @@ func App(application *core.Application) (*fiber.App, error) { app.Get("/models/jobs/:uuid", auth, modelGalleryEndpointService.GetOpStatusEndpoint()) app.Get("/models/jobs", auth, modelGalleryEndpointService.GetAllStatusEndpoint()) - // Stores - storeLoader := model.NewModelLoader("") // TODO: Investigate if this should be migrated to application and reused. Should the path be configurable? Merging for now. - app.Post("/stores/set", auth, localai.StoresSetEndpoint(storeLoader, application.ApplicationConfig)) - app.Post("/stores/delete", auth, localai.StoresDeleteEndpoint(storeLoader, application.ApplicationConfig)) - app.Post("/stores/get", auth, localai.StoresGetEndpoint(storeLoader, application.ApplicationConfig)) - app.Post("/stores/find", auth, localai.StoresFindEndpoint(storeLoader, application.ApplicationConfig)) - - // openAI compatible API endpoints - - // chat - app.Post("/v1/chat/completions", auth, openai.ChatEndpoint(fiberContextExtractor, application.OpenAIService)) - app.Post("/chat/completions", auth, openai.ChatEndpoint(fiberContextExtractor, application.OpenAIService)) - - // edit - app.Post("/v1/edits", auth, openai.EditEndpoint(fiberContextExtractor, application.OpenAIService)) - app.Post("/edits", auth, openai.EditEndpoint(fiberContextExtractor, application.OpenAIService)) - - // assistant - // TODO: Refactor this to the new style eventually - app.Get("/v1/assistants", auth, openai.ListAssistantsEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) - app.Get("/assistants", auth, openai.ListAssistantsEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) - app.Post("/v1/assistants", auth, openai.CreateAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) - app.Post("/assistants", auth, openai.CreateAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) - app.Delete("/v1/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) - app.Delete("/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) - app.Get("/v1/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) - app.Get("/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) - app.Post("/v1/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) - app.Post("/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) - app.Get("/v1/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) - app.Get("/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) - app.Post("/v1/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) - app.Post("/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) - app.Delete("/v1/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) - app.Delete("/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) - app.Get("/v1/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) - app.Get("/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(application.BackendConfigLoader, application.ModelLoader, application.ApplicationConfig)) - - // files - app.Post("/v1/files", auth, openai.UploadFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) - app.Post("/files", auth, openai.UploadFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) - app.Get("/v1/files", auth, openai.ListFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) - app.Get("/files", auth, openai.ListFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) - app.Get("/v1/files/:file_id", auth, openai.GetFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) - app.Get("/files/:file_id", auth, openai.GetFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) - app.Delete("/v1/files/:file_id", auth, openai.DeleteFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) - app.Delete("/files/:file_id", auth, openai.DeleteFilesEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) - app.Get("/v1/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) - app.Get("/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(application.BackendConfigLoader, application.ApplicationConfig)) - - // completion - app.Post("/v1/completions", auth, openai.CompletionEndpoint(fiberContextExtractor, application.OpenAIService)) - app.Post("/completions", auth, openai.CompletionEndpoint(fiberContextExtractor, application.OpenAIService)) - app.Post("/v1/engines/:model/completions", auth, openai.CompletionEndpoint(fiberContextExtractor, application.OpenAIService)) - - // embeddings - app.Post("/v1/embeddings", auth, openai.EmbeddingsEndpoint(fiberContextExtractor, application.EmbeddingsBackendService)) - app.Post("/embeddings", auth, openai.EmbeddingsEndpoint(fiberContextExtractor, application.EmbeddingsBackendService)) - app.Post("/v1/engines/:model/embeddings", auth, openai.EmbeddingsEndpoint(fiberContextExtractor, application.EmbeddingsBackendService)) - - // audio - app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(fiberContextExtractor, application.TranscriptionBackendService)) - app.Post("/v1/audio/speech", auth, localai.TTSEndpoint(fiberContextExtractor, application.TextToSpeechBackendService)) - - // images - app.Post("/v1/images/generations", auth, openai.ImageEndpoint(fiberContextExtractor, application.ImageGenerationBackendService)) + app.Post("/tts", auth, localai.TTSEndpoint(cl, ml, appConfig)) // Elevenlabs - app.Post("/v1/text-to-speech/:voice-id", auth, elevenlabs.TTSEndpoint(fiberContextExtractor, application.TextToSpeechBackendService)) + app.Post("/v1/text-to-speech/:voice-id", auth, elevenlabs.TTSEndpoint(cl, ml, appConfig)) - // LocalAI TTS? - app.Post("/tts", auth, localai.TTSEndpoint(fiberContextExtractor, application.TextToSpeechBackendService)) + // Stores + sl := model.NewModelLoader("") + app.Post("/stores/set", auth, localai.StoresSetEndpoint(sl, appConfig)) + app.Post("/stores/delete", auth, localai.StoresDeleteEndpoint(sl, appConfig)) + app.Post("/stores/get", auth, localai.StoresGetEndpoint(sl, appConfig)) + app.Post("/stores/find", auth, localai.StoresFindEndpoint(sl, appConfig)) - if application.ApplicationConfig.ImageDir != "" { - app.Static("/generated-images", application.ApplicationConfig.ImageDir) + // openAI compatible API endpoint + + // chat + app.Post("/v1/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig)) + app.Post("/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig)) + + // edit + app.Post("/v1/edits", auth, openai.EditEndpoint(cl, ml, appConfig)) + app.Post("/edits", auth, openai.EditEndpoint(cl, ml, appConfig)) + + // assistant + app.Get("/v1/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig)) + app.Get("/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig)) + app.Post("/v1/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig)) + app.Post("/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig)) + app.Delete("/v1/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig)) + app.Delete("/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig)) + app.Get("/v1/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig)) + app.Get("/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig)) + app.Post("/v1/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig)) + app.Post("/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig)) + app.Get("/v1/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig)) + app.Get("/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig)) + app.Post("/v1/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig)) + app.Post("/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig)) + app.Delete("/v1/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig)) + app.Delete("/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig)) + app.Get("/v1/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig)) + app.Get("/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig)) + + // files + app.Post("/v1/files", auth, openai.UploadFilesEndpoint(cl, appConfig)) + app.Post("/files", auth, openai.UploadFilesEndpoint(cl, appConfig)) + app.Get("/v1/files", auth, openai.ListFilesEndpoint(cl, appConfig)) + app.Get("/files", auth, openai.ListFilesEndpoint(cl, appConfig)) + app.Get("/v1/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig)) + app.Get("/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig)) + app.Delete("/v1/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig)) + app.Delete("/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig)) + app.Get("/v1/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig)) + app.Get("/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig)) + + // completion + app.Post("/v1/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig)) + app.Post("/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig)) + app.Post("/v1/engines/:model/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig)) + + // embeddings + app.Post("/v1/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig)) + app.Post("/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig)) + app.Post("/v1/engines/:model/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig)) + + // audio + app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(cl, ml, appConfig)) + app.Post("/v1/audio/speech", auth, localai.TTSEndpoint(cl, ml, appConfig)) + + // images + app.Post("/v1/images/generations", auth, openai.ImageEndpoint(cl, ml, appConfig)) + + if appConfig.ImageDir != "" { + app.Static("/generated-images", appConfig.ImageDir) } - if application.ApplicationConfig.AudioDir != "" { - app.Static("/generated-audio", application.ApplicationConfig.AudioDir) + if appConfig.AudioDir != "" { + app.Static("/generated-audio", appConfig.AudioDir) } ok := func(c *fiber.Ctx) error { @@ -261,12 +291,13 @@ func App(application *core.Application) (*fiber.App, error) { app.Get("/readyz", ok) // Experimental Backend Statistics Module - app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(application.BackendMonitorService)) - app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(application.BackendMonitorService)) + backendMonitor := services.NewBackendMonitor(cl, ml, appConfig) // Split out for now + app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(backendMonitor)) + app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(backendMonitor)) // models - app.Get("/v1/models", auth, openai.ListModelsEndpoint(application.ListModelsService)) - app.Get("/models", auth, openai.ListModelsEndpoint(application.ListModelsService)) + app.Get("/v1/models", auth, openai.ListModelsEndpoint(cl, ml)) + app.Get("/models", auth, openai.ListModelsEndpoint(cl, ml)) app.Get("/metrics", auth, localai.LocalAIMetricsEndpoint()) diff --git a/core/http/api_test.go b/core/http/api_test.go index bf8feb1c..1553ed21 100644 --- a/core/http/api_test.go +++ b/core/http/api_test.go @@ -12,9 +12,7 @@ import ( "os" "path/filepath" "runtime" - "strings" - "github.com/go-skynet/LocalAI/core" "github.com/go-skynet/LocalAI/core/config" . "github.com/go-skynet/LocalAI/core/http" "github.com/go-skynet/LocalAI/core/schema" @@ -207,7 +205,9 @@ var _ = Describe("API test", func() { var cancel context.CancelFunc var tmpdir string var modelDir string - var application *core.Application + var bcl *config.BackendConfigLoader + var ml *model.ModelLoader + var applicationConfig *config.ApplicationConfig commonOpts := []config.AppOption{ config.WithDebug(true), @@ -252,7 +252,7 @@ var _ = Describe("API test", func() { }, } - application, err = startup.Startup( + bcl, ml, applicationConfig, err = startup.Startup( append(commonOpts, config.WithContext(c), config.WithGalleries(galleries), @@ -261,7 +261,7 @@ var _ = Describe("API test", func() { config.WithBackendAssetsOutput(backendAssetsDir))...) Expect(err).ToNot(HaveOccurred()) - app, err = App(application) + app, err = App(bcl, ml, applicationConfig) Expect(err).ToNot(HaveOccurred()) go app.Listen("127.0.0.1:9090") @@ -474,11 +474,11 @@ var _ = Describe("API test", func() { }) Expect(err).ToNot(HaveOccurred()) Expect(len(resp2.Choices)).To(Equal(1)) - Expect(resp2.Choices[0].Message.ToolCalls[0].Function).ToNot(BeNil()) - Expect(resp2.Choices[0].Message.ToolCalls[0].Function.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.ToolCalls[0].Function.Name) + Expect(resp2.Choices[0].Message.FunctionCall).ToNot(BeNil()) + Expect(resp2.Choices[0].Message.FunctionCall.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.FunctionCall.Name) var res map[string]string - err = json.Unmarshal([]byte(resp2.Choices[0].Message.ToolCalls[0].Function.Arguments), &res) + err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res) Expect(err).ToNot(HaveOccurred()) Expect(res["location"]).To(Equal("San Francisco"), fmt.Sprint(res)) Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res)) @@ -487,9 +487,9 @@ var _ = Describe("API test", func() { }) It("runs openllama gguf(llama-cpp)", Label("llama-gguf"), func() { - // if runtime.GOOS != "linux" { - // Skip("test supported only on linux") - // } + if runtime.GOOS != "linux" { + Skip("test supported only on linux") + } modelName := "codellama" response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{ URL: "github:go-skynet/model-gallery/codellama-7b-instruct.yaml", @@ -504,7 +504,7 @@ var _ = Describe("API test", func() { Eventually(func() bool { response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid) return response["processed"].(bool) - }, "480s", "10s").Should(Equal(true)) + }, "360s", "10s").Should(Equal(true)) By("testing chat") resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: modelName, Messages: []openai.ChatCompletionMessage{ @@ -551,13 +551,11 @@ var _ = Describe("API test", func() { }) Expect(err).ToNot(HaveOccurred()) Expect(len(resp2.Choices)).To(Equal(1)) - fmt.Printf("\n--- %+v\n\n", resp2.Choices[0].Message) - Expect(resp2.Choices[0].Message.ToolCalls).ToNot(BeNil()) - Expect(resp2.Choices[0].Message.ToolCalls[0]).ToNot(BeNil()) - Expect(resp2.Choices[0].Message.ToolCalls[0].Function.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.ToolCalls[0].Function.Name) + Expect(resp2.Choices[0].Message.FunctionCall).ToNot(BeNil()) + Expect(resp2.Choices[0].Message.FunctionCall.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.FunctionCall.Name) var res map[string]string - err = json.Unmarshal([]byte(resp2.Choices[0].Message.ToolCalls[0].Function.Arguments), &res) + err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res) Expect(err).ToNot(HaveOccurred()) Expect(res["location"]).To(Equal("San Francisco"), fmt.Sprint(res)) Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res)) @@ -611,7 +609,7 @@ var _ = Describe("API test", func() { }, } - application, err = startup.Startup( + bcl, ml, applicationConfig, err = startup.Startup( append(commonOpts, config.WithContext(c), config.WithAudioDir(tmpdir), @@ -622,7 +620,7 @@ var _ = Describe("API test", func() { config.WithBackendAssetsOutput(tmpdir))..., ) Expect(err).ToNot(HaveOccurred()) - app, err = App(application) + app, err = App(bcl, ml, applicationConfig) Expect(err).ToNot(HaveOccurred()) go app.Listen("127.0.0.1:9090") @@ -726,14 +724,14 @@ var _ = Describe("API test", func() { var err error - application, err = startup.Startup( + bcl, ml, applicationConfig, err = startup.Startup( append(commonOpts, config.WithExternalBackend("huggingface", os.Getenv("HUGGINGFACE_GRPC")), config.WithContext(c), config.WithModelPath(modelPath), )...) Expect(err).ToNot(HaveOccurred()) - app, err = App(application) + app, err = App(bcl, ml, applicationConfig) Expect(err).ToNot(HaveOccurred()) go app.Listen("127.0.0.1:9090") @@ -763,11 +761,6 @@ var _ = Describe("API test", func() { Expect(len(models.Models)).To(Equal(6)) // If "config.yaml" should be included, this should be 8? }) It("can generate completions via ggml", func() { - bt, ok := os.LookupEnv("BUILD_TYPE") - if ok && strings.ToLower(bt) == "metal" { - Skip("GGML + Metal is known flaky, skip test temporarily") - } - resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel.ggml", Prompt: testPrompt}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) @@ -775,11 +768,6 @@ var _ = Describe("API test", func() { }) It("can generate chat completions via ggml", func() { - bt, ok := os.LookupEnv("BUILD_TYPE") - if ok && strings.ToLower(bt) == "metal" { - Skip("GGML + Metal is known flaky, skip test temporarily") - } - resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel.ggml", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) @@ -787,11 +775,6 @@ var _ = Describe("API test", func() { }) It("can generate completions from model configs", func() { - bt, ok := os.LookupEnv("BUILD_TYPE") - if ok && strings.ToLower(bt) == "metal" { - Skip("GGML + Metal is known flaky, skip test temporarily") - } - resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "gpt4all", Prompt: testPrompt}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) @@ -799,11 +782,6 @@ var _ = Describe("API test", func() { }) It("can generate chat completions from model configs", func() { - bt, ok := os.LookupEnv("BUILD_TYPE") - if ok && strings.ToLower(bt) == "metal" { - Skip("GGML + Metal is known flaky, skip test temporarily") - } - resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) @@ -890,9 +868,9 @@ var _ = Describe("API test", func() { Context("backends", func() { It("runs rwkv completion", func() { - // if runtime.GOOS != "linux" { - // Skip("test supported only on linux") - // } + if runtime.GOOS != "linux" { + Skip("test supported only on linux") + } resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "rwkv_test", Prompt: "Count up to five: one, two, three, four,"}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices) > 0).To(BeTrue()) @@ -913,20 +891,17 @@ var _ = Describe("API test", func() { } Expect(err).ToNot(HaveOccurred()) - - if len(response.Choices) > 0 { - text += response.Choices[0].Text - tokens++ - } + text += response.Choices[0].Text + tokens++ } Expect(text).ToNot(BeEmpty()) Expect(text).To(ContainSubstring("five")) Expect(tokens).ToNot(Or(Equal(1), Equal(0))) }) It("runs rwkv chat completion", func() { - // if runtime.GOOS != "linux" { - // Skip("test supported only on linux") - // } + if runtime.GOOS != "linux" { + Skip("test supported only on linux") + } resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}}) Expect(err).ToNot(HaveOccurred()) @@ -1035,14 +1010,14 @@ var _ = Describe("API test", func() { c, cancel = context.WithCancel(context.Background()) var err error - application, err = startup.Startup( + bcl, ml, applicationConfig, err = startup.Startup( append(commonOpts, config.WithContext(c), config.WithModelPath(modelPath), config.WithConfigFile(os.Getenv("CONFIG_FILE")))..., ) Expect(err).ToNot(HaveOccurred()) - app, err = App(application) + app, err = App(bcl, ml, applicationConfig) Expect(err).ToNot(HaveOccurred()) go app.Listen("127.0.0.1:9090") @@ -1066,33 +1041,18 @@ var _ = Describe("API test", func() { } }) It("can generate chat completions from config file (list1)", func() { - bt, ok := os.LookupEnv("BUILD_TYPE") - if ok && strings.ToLower(bt) == "metal" { - Skip("GGML + Metal is known flaky, skip test temporarily") - } - resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list1", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: testPrompt}}}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty()) }) It("can generate chat completions from config file (list2)", func() { - bt, ok := os.LookupEnv("BUILD_TYPE") - if ok && strings.ToLower(bt) == "metal" { - Skip("GGML + Metal is known flaky, skip test temporarily") - } - resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list2", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: testPrompt}}}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty()) }) It("can generate edit completions from config file", func() { - bt, ok := os.LookupEnv("BUILD_TYPE") - if ok && strings.ToLower(bt) == "metal" { - Skip("GGML + Metal is known flaky, skip test temporarily") - } - request := openaigo.EditCreateRequestBody{ Model: "list2", Instruction: "foo", diff --git a/core/http/ctx/fiber.go b/core/http/ctx/fiber.go index 99fbcde9..ffb63111 100644 --- a/core/http/ctx/fiber.go +++ b/core/http/ctx/fiber.go @@ -1,88 +1,43 @@ package fiberContext import ( - "context" - "encoding/json" "fmt" "strings" - "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/schema" "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" "github.com/rs/zerolog/log" ) -type FiberContextExtractor struct { - ml *model.ModelLoader - appConfig *config.ApplicationConfig -} - -func NewFiberContextExtractor(ml *model.ModelLoader, appConfig *config.ApplicationConfig) *FiberContextExtractor { - return &FiberContextExtractor{ - ml: ml, - appConfig: appConfig, - } -} - // ModelFromContext returns the model from the context // If no model is specified, it will take the first available // Takes a model string as input which should be the one received from the user request. // It returns the model name resolved from the context and an error if any. -func (fce *FiberContextExtractor) ModelFromContext(ctx *fiber.Ctx, modelInput string, firstModel bool) (string, error) { - ctxPM := ctx.Params("model") - if ctxPM != "" { - log.Debug().Msgf("[FCE] Overriding param modelInput %q with ctx.Params value %q", modelInput, ctxPM) - modelInput = ctxPM +func ModelFromContext(ctx *fiber.Ctx, loader *model.ModelLoader, modelInput string, firstModel bool) (string, error) { + if ctx.Params("model") != "" { + modelInput = ctx.Params("model") } // Set model from bearer token, if available - bearer := strings.TrimPrefix(ctx.Get("authorization"), "Bearer ") - bearerExists := bearer != "" && fce.ml.ExistsInModelPath(bearer) + bearer := strings.TrimLeft(ctx.Get("authorization"), "Bearer ") + bearerExists := bearer != "" && loader.ExistsInModelPath(bearer) // If no model was specified, take the first available if modelInput == "" && !bearerExists && firstModel { - models, _ := fce.ml.ListModels() + models, _ := loader.ListModels() if len(models) > 0 { modelInput = models[0] - log.Debug().Msgf("[FCE] No model specified, using first available: %s", modelInput) + log.Debug().Msgf("No model specified, using: %s", modelInput) } else { - log.Warn().Msgf("[FCE] No model specified, none available") - return "", fmt.Errorf("[fce] no model specified, none available") + log.Debug().Msgf("No model specified, returning error") + return "", fmt.Errorf("no model specified") } } // If a model is found in bearer token takes precedence if bearerExists { - log.Debug().Msgf("[FCE] Using model from bearer token: %s", bearer) + log.Debug().Msgf("Using model from bearer token: %s", bearer) modelInput = bearer } - - if modelInput == "" { - log.Warn().Msg("[FCE] modelInput is empty") - } return modelInput, nil } - -// TODO: Do we still need the first return value? -func (fce *FiberContextExtractor) OpenAIRequestFromContext(c *fiber.Ctx, firstModel bool) (string, *schema.OpenAIRequest, error) { - input := new(schema.OpenAIRequest) - - // Get input data from the request body - if err := c.BodyParser(input); err != nil { - return "", nil, fmt.Errorf("failed parsing request body: %w", err) - } - - received, _ := json.Marshal(input) - - ctx, cancel := context.WithCancel(fce.appConfig.Context) - input.Context = ctx - input.Cancel = cancel - - log.Debug().Msgf("Request received: %s", string(received)) - - var err error - input.Model, err = fce.ModelFromContext(c, input.Model, firstModel) - - return input.Model, input, err -} diff --git a/core/http/endpoints/elevenlabs/tts.go b/core/http/endpoints/elevenlabs/tts.go index 4f5db463..841f9b5f 100644 --- a/core/http/endpoints/elevenlabs/tts.go +++ b/core/http/endpoints/elevenlabs/tts.go @@ -2,7 +2,9 @@ package elevenlabs import ( "github.com/go-skynet/LocalAI/core/backend" + "github.com/go-skynet/LocalAI/core/config" fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" + "github.com/go-skynet/LocalAI/pkg/model" "github.com/go-skynet/LocalAI/core/schema" "github.com/gofiber/fiber/v2" @@ -15,7 +17,7 @@ import ( // @Param request body schema.TTSRequest true "query params" // @Success 200 {string} binary "Response" // @Router /v1/text-to-speech/{voice-id} [post] -func TTSEndpoint(fce *fiberContext.FiberContextExtractor, ttsbs *backend.TextToSpeechBackendService) func(c *fiber.Ctx) error { +func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { input := new(schema.ElevenLabsTTSRequest) @@ -26,21 +28,34 @@ func TTSEndpoint(fce *fiberContext.FiberContextExtractor, ttsbs *backend.TextToS return err } - var err error - input.ModelID, err = fce.ModelFromContext(c, input.ModelID, false) + modelFile, err := fiberContext.ModelFromContext(c, ml, input.ModelID, false) if err != nil { + modelFile = input.ModelID log.Warn().Msgf("Model not found in context: %s", input.ModelID) } - responseChannel := ttsbs.TextToAudioFile(&schema.TTSRequest{ - Model: input.ModelID, - Voice: voiceID, - Input: input.Text, - }) - rawValue := <-responseChannel - if rawValue.Error != nil { - return rawValue.Error + cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath, + config.LoadOptionDebug(appConfig.Debug), + config.LoadOptionThreads(appConfig.Threads), + config.LoadOptionContextSize(appConfig.ContextSize), + config.LoadOptionF16(appConfig.F16), + ) + if err != nil { + modelFile = input.ModelID + log.Warn().Msgf("Model not found in context: %s", input.ModelID) + } else { + if input.ModelID != "" { + modelFile = input.ModelID + } else { + modelFile = cfg.Model + } } - return c.Download(*rawValue.Value) + log.Debug().Msgf("Request for model: %s", modelFile) + + filePath, _, err := backend.ModelTTS(cfg.Backend, input.Text, modelFile, voiceID, ml, appConfig, *cfg) + if err != nil { + return err + } + return c.Download(filePath) } } diff --git a/core/http/endpoints/localai/backend_monitor.go b/core/http/endpoints/localai/backend_monitor.go index dac20388..8c7a664a 100644 --- a/core/http/endpoints/localai/backend_monitor.go +++ b/core/http/endpoints/localai/backend_monitor.go @@ -6,7 +6,7 @@ import ( "github.com/gofiber/fiber/v2" ) -func BackendMonitorEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ctx) error { +func BackendMonitorEndpoint(bm services.BackendMonitor) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { input := new(schema.BackendMonitorRequest) @@ -23,7 +23,7 @@ func BackendMonitorEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ct } } -func BackendShutdownEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ctx) error { +func BackendShutdownEndpoint(bm services.BackendMonitor) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { input := new(schema.BackendMonitorRequest) // Get input data from the request body diff --git a/core/http/endpoints/localai/tts.go b/core/http/endpoints/localai/tts.go index df7841fb..7822e024 100644 --- a/core/http/endpoints/localai/tts.go +++ b/core/http/endpoints/localai/tts.go @@ -2,7 +2,9 @@ package localai import ( "github.com/go-skynet/LocalAI/core/backend" + "github.com/go-skynet/LocalAI/core/config" fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" + "github.com/go-skynet/LocalAI/pkg/model" "github.com/go-skynet/LocalAI/core/schema" "github.com/gofiber/fiber/v2" @@ -14,26 +16,45 @@ import ( // @Param request body schema.TTSRequest true "query params" // @Success 200 {string} binary "Response" // @Router /v1/audio/speech [post] -func TTSEndpoint(fce *fiberContext.FiberContextExtractor, ttsbs *backend.TextToSpeechBackendService) func(c *fiber.Ctx) error { +func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - var err error + input := new(schema.TTSRequest) // Get input data from the request body - if err = c.BodyParser(input); err != nil { + if err := c.BodyParser(input); err != nil { return err } - input.Model, err = fce.ModelFromContext(c, input.Model, false) + modelFile, err := fiberContext.ModelFromContext(c, ml, input.Model, false) if err != nil { + modelFile = input.Model log.Warn().Msgf("Model not found in context: %s", input.Model) } - responseChannel := ttsbs.TextToAudioFile(input) - rawValue := <-responseChannel - if rawValue.Error != nil { - return rawValue.Error + cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath, + config.LoadOptionDebug(appConfig.Debug), + config.LoadOptionThreads(appConfig.Threads), + config.LoadOptionContextSize(appConfig.ContextSize), + config.LoadOptionF16(appConfig.F16), + ) + + if err != nil { + modelFile = input.Model + log.Warn().Msgf("Model not found in context: %s", input.Model) + } else { + modelFile = cfg.Model } - return c.Download(*rawValue.Value) + log.Debug().Msgf("Request for model: %s", modelFile) + + if input.Backend != "" { + cfg.Backend = input.Backend + } + + filePath, _, err := backend.ModelTTS(cfg.Backend, input.Input, modelFile, input.Voice, ml, appConfig, *cfg) + if err != nil { + return err + } + return c.Download(filePath) } } diff --git a/core/http/endpoints/openai/assistant.go b/core/http/endpoints/openai/assistant.go index 72cb8b4a..dceb3789 100644 --- a/core/http/endpoints/openai/assistant.go +++ b/core/http/endpoints/openai/assistant.go @@ -339,7 +339,7 @@ func CreateAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model } } - return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find assistantID %q", assistantID)) + return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find ")) } } diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index a240b024..36d1142b 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -5,11 +5,17 @@ import ( "bytes" "encoding/json" "fmt" + "strings" + "time" - fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" + "github.com/go-skynet/LocalAI/core/backend" + "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/schema" - "github.com/go-skynet/LocalAI/core/services" + "github.com/go-skynet/LocalAI/pkg/grammar" + model "github.com/go-skynet/LocalAI/pkg/model" + "github.com/go-skynet/LocalAI/pkg/utils" "github.com/gofiber/fiber/v2" + "github.com/google/uuid" "github.com/rs/zerolog/log" "github.com/valyala/fasthttp" ) @@ -19,82 +25,412 @@ import ( // @Param request body schema.OpenAIRequest true "query params" // @Success 200 {object} schema.OpenAIResponse "Response" // @Router /v1/chat/completions [post] -func ChatEndpoint(fce *fiberContext.FiberContextExtractor, oais *services.OpenAIService) func(c *fiber.Ctx) error { +func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error { + emptyMessage := "" + id := uuid.New().String() + created := int(time.Now().Unix()) + + process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) { + initialMessage := schema.OpenAIResponse{ + ID: id, + Created: created, + Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &emptyMessage}}}, + Object: "chat.completion.chunk", + } + responses <- initialMessage + + ComputeChoices(req, s, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool { + resp := schema.OpenAIResponse{ + ID: id, + Created: created, + Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: []schema.Choice{{Delta: &schema.Message{Content: &s}, Index: 0}}, + Object: "chat.completion.chunk", + Usage: schema.OpenAIUsage{ + PromptTokens: usage.Prompt, + CompletionTokens: usage.Completion, + TotalTokens: usage.Prompt + usage.Completion, + }, + } + + responses <- resp + return true + }) + close(responses) + } + processTools := func(noAction string, prompt string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) { + result := "" + _, tokenUsage, _ := ComputeChoices(req, prompt, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool { + result += s + // TODO: Change generated BNF grammar to be compliant with the schema so we can + // stream the result token by token here. + return true + }) + + results := parseFunctionCall(result, config.FunctionsConfig.ParallelCalls) + noActionToRun := len(results) > 0 && results[0].name == noAction + + switch { + case noActionToRun: + initialMessage := schema.OpenAIResponse{ + ID: id, + Created: created, + Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &emptyMessage}}}, + Object: "chat.completion.chunk", + } + responses <- initialMessage + + result, err := handleQuestion(config, req, ml, startupOptions, results[0].arguments, prompt) + if err != nil { + log.Error().Err(err).Msg("error handling question") + return + } + + resp := schema.OpenAIResponse{ + ID: id, + Created: created, + Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: []schema.Choice{{Delta: &schema.Message{Content: &result}, Index: 0}}, + Object: "chat.completion.chunk", + Usage: schema.OpenAIUsage{ + PromptTokens: tokenUsage.Prompt, + CompletionTokens: tokenUsage.Completion, + TotalTokens: tokenUsage.Prompt + tokenUsage.Completion, + }, + } + + responses <- resp + + default: + for i, ss := range results { + name, args := ss.name, ss.arguments + + initialMessage := schema.OpenAIResponse{ + ID: id, + Created: created, + Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: []schema.Choice{{ + Delta: &schema.Message{ + Role: "assistant", + ToolCalls: []schema.ToolCall{ + { + Index: i, + ID: id, + Type: "function", + FunctionCall: schema.FunctionCall{ + Name: name, + }, + }, + }, + }}}, + Object: "chat.completion.chunk", + } + responses <- initialMessage + + responses <- schema.OpenAIResponse{ + ID: id, + Created: created, + Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: []schema.Choice{{ + Delta: &schema.Message{ + Role: "assistant", + ToolCalls: []schema.ToolCall{ + { + Index: i, + ID: id, + Type: "function", + FunctionCall: schema.FunctionCall{ + Arguments: args, + }, + }, + }, + }}}, + Object: "chat.completion.chunk", + } + } + } + + close(responses) + } + return func(c *fiber.Ctx) error { - _, request, err := fce.OpenAIRequestFromContext(c, false) + processFunctions := false + funcs := grammar.Functions{} + modelFile, input, err := readRequest(c, ml, startupOptions, true) if err != nil { - return fmt.Errorf("failed reading parameters from request: %w", err) + return fmt.Errorf("failed reading parameters from request:%w", err) } - traceID, finalResultChannel, _, tokenChannel, err := oais.Chat(request, false, request.Stream) + config, input, err := mergeRequestWithConfig(modelFile, input, cl, ml, startupOptions.Debug, startupOptions.Threads, startupOptions.ContextSize, startupOptions.F16) if err != nil { - return err + return fmt.Errorf("failed reading parameters from request:%w", err) + } + log.Debug().Msgf("Configuration read: %+v", config) + + // Allow the user to set custom actions via config file + // to be "embedded" in each model + noActionName := "answer" + noActionDescription := "use this action to answer without performing any action" + + if config.FunctionsConfig.NoActionFunctionName != "" { + noActionName = config.FunctionsConfig.NoActionFunctionName + } + if config.FunctionsConfig.NoActionDescriptionName != "" { + noActionDescription = config.FunctionsConfig.NoActionDescriptionName } - if request.Stream { + if input.ResponseFormat.Type == "json_object" { + input.Grammar = grammar.JSONBNF + } - log.Debug().Msgf("Chat Stream request received") + config.Grammar = input.Grammar + // process functions if we have any defined or if we have a function call string + if len(input.Functions) > 0 && config.ShouldUseFunctions() { + log.Debug().Msgf("Response needs to process functions") + + processFunctions = true + + noActionGrammar := grammar.Function{ + Name: noActionName, + Description: noActionDescription, + Parameters: map[string]interface{}{ + "properties": map[string]interface{}{ + "message": map[string]interface{}{ + "type": "string", + "description": "The message to reply the user with", + }}, + }, + } + + // Append the no action function + funcs = append(funcs, input.Functions...) + if !config.FunctionsConfig.DisableNoAction { + funcs = append(funcs, noActionGrammar) + } + + // Force picking one of the functions by the request + if config.FunctionToCall() != "" { + funcs = funcs.Select(config.FunctionToCall()) + } + + // Update input grammar + jsStruct := funcs.ToJSONStructure() + config.Grammar = jsStruct.Grammar("", config.FunctionsConfig.ParallelCalls) + } else if input.JSONFunctionGrammarObject != nil { + config.Grammar = input.JSONFunctionGrammarObject.Grammar("", config.FunctionsConfig.ParallelCalls) + } + + // functions are not supported in stream mode (yet?) + toStream := input.Stream + + log.Debug().Msgf("Parameters: %+v", config) + + var predInput string + + // If we are using the tokenizer template, we don't need to process the messages + // unless we are processing functions + if !config.TemplateConfig.UseTokenizerTemplate || processFunctions { + + suppressConfigSystemPrompt := false + mess := []string{} + for messageIndex, i := range input.Messages { + var content string + role := i.Role + + // if function call, we might want to customize the role so we can display better that the "assistant called a json action" + // if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request + if (i.FunctionCall != nil || i.ToolCalls != nil) && i.Role == "assistant" { + roleFn := "assistant_function_call" + r := config.Roles[roleFn] + if r != "" { + role = roleFn + } + } + r := config.Roles[role] + contentExists := i.Content != nil && i.StringContent != "" + + fcall := i.FunctionCall + if len(i.ToolCalls) > 0 { + fcall = i.ToolCalls + } + + // First attempt to populate content via a chat message specific template + if config.TemplateConfig.ChatMessage != "" { + chatMessageData := model.ChatMessageTemplateData{ + SystemPrompt: config.SystemPrompt, + Role: r, + RoleName: role, + Content: i.StringContent, + FunctionCall: fcall, + FunctionName: i.Name, + LastMessage: messageIndex == (len(input.Messages) - 1), + Function: config.Grammar != "" && (messageIndex == (len(input.Messages) - 1)), + MessageIndex: messageIndex, + } + templatedChatMessage, err := ml.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData) + if err != nil { + log.Error().Err(err).Interface("message", chatMessageData).Str("template", config.TemplateConfig.ChatMessage).Msg("error processing message with template, skipping") + } else { + if templatedChatMessage == "" { + log.Warn().Msgf("template \"%s\" produced blank output for %+v. Skipping!", config.TemplateConfig.ChatMessage, chatMessageData) + continue // TODO: This continue is here intentionally to skip over the line `mess = append(mess, content)` below, and to prevent the sprintf + } + log.Debug().Msgf("templated message for chat: %s", templatedChatMessage) + content = templatedChatMessage + } + } + + marshalAnyRole := func(f any) { + j, err := json.Marshal(f) + if err == nil { + if contentExists { + content += "\n" + fmt.Sprint(r, " ", string(j)) + } else { + content = fmt.Sprint(r, " ", string(j)) + } + } + } + marshalAny := func(f any) { + j, err := json.Marshal(f) + if err == nil { + if contentExists { + content += "\n" + string(j) + } else { + content = string(j) + } + } + } + // If this model doesn't have such a template, or if that template fails to return a value, template at the message level. + if content == "" { + if r != "" { + if contentExists { + content = fmt.Sprint(r, i.StringContent) + } + + if i.FunctionCall != nil { + marshalAnyRole(i.FunctionCall) + } + if i.ToolCalls != nil { + marshalAnyRole(i.ToolCalls) + } + } else { + if contentExists { + content = fmt.Sprint(i.StringContent) + } + if i.FunctionCall != nil { + marshalAny(i.FunctionCall) + } + if i.ToolCalls != nil { + marshalAny(i.ToolCalls) + } + } + // Special Handling: System. We care if it was printed at all, not the r branch, so check seperately + if contentExists && role == "system" { + suppressConfigSystemPrompt = true + } + } + + mess = append(mess, content) + } + + predInput = strings.Join(mess, "\n") + log.Debug().Msgf("Prompt (before templating): %s", predInput) + + templateFile := "" + + // A model can have a "file.bin.tmpl" file associated with a prompt template prefix + if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) { + templateFile = config.Model + } + + if config.TemplateConfig.Chat != "" && !processFunctions { + templateFile = config.TemplateConfig.Chat + } + + if config.TemplateConfig.Functions != "" && processFunctions { + templateFile = config.TemplateConfig.Functions + } + + if templateFile != "" { + templatedInput, err := ml.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{ + SystemPrompt: config.SystemPrompt, + SuppressSystemPrompt: suppressConfigSystemPrompt, + Input: predInput, + Functions: funcs, + }) + if err == nil { + predInput = templatedInput + log.Debug().Msgf("Template found, input modified to: %s", predInput) + } else { + log.Debug().Msgf("Template failed loading: %s", err.Error()) + } + } + + log.Debug().Msgf("Prompt (after templating): %s", predInput) + if processFunctions { + log.Debug().Msgf("Grammar: %+v", config.Grammar) + } + } + + switch { + case toStream: + + log.Debug().Msgf("Stream request received") c.Context().SetContentType("text/event-stream") //c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8) - // + // c.Set("Content-Type", "text/event-stream") c.Set("Cache-Control", "no-cache") c.Set("Connection", "keep-alive") c.Set("Transfer-Encoding", "chunked") + responses := make(chan schema.OpenAIResponse) + + if !processFunctions { + go process(predInput, input, config, ml, responses) + } else { + go processTools(noActionName, predInput, input, config, ml, responses) + } + c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) { usage := &schema.OpenAIUsage{} toolsCalled := false - for ev := range tokenChannel { - if ev.Error != nil { - log.Debug().Err(ev.Error).Msg("chat streaming responseChannel error") - request.Cancel() - break - } - usage = &ev.Value.Usage // Copy a pointer to the latest usage chunk so that the stop message can reference it - - if len(ev.Value.Choices[0].Delta.ToolCalls) > 0 { + for ev := range responses { + usage = &ev.Usage // Copy a pointer to the latest usage chunk so that the stop message can reference it + if len(ev.Choices[0].Delta.ToolCalls) > 0 { toolsCalled = true } var buf bytes.Buffer enc := json.NewEncoder(&buf) - if ev.Error != nil { - log.Debug().Err(ev.Error).Msg("[ChatEndpoint] error to debug during tokenChannel handler") - enc.Encode(ev.Error) - } else { - enc.Encode(ev.Value) - } - log.Debug().Msgf("chat streaming sending chunk: %s", buf.String()) + enc.Encode(ev) + log.Debug().Msgf("Sending chunk: %s", buf.String()) _, err := fmt.Fprintf(w, "data: %v\n", buf.String()) if err != nil { - log.Debug().Err(err).Msgf("Sending chunk failed") - request.Cancel() - break - } - err = w.Flush() - if err != nil { - log.Debug().Msg("error while flushing, closing connection") - request.Cancel() + log.Debug().Msgf("Sending chunk failed: %v", err) + input.Cancel() break } + w.Flush() } finishReason := "stop" if toolsCalled { finishReason = "tool_calls" - } else if toolsCalled && len(request.Tools) == 0 { + } else if toolsCalled && len(input.Tools) == 0 { finishReason = "function_call" } resp := &schema.OpenAIResponse{ - ID: traceID.ID, - Created: traceID.Created, - Model: request.Model, // we have to return what the user sent here, due to OpenAI spec. + ID: id, + Created: created, + Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. Choices: []schema.Choice{ { FinishReason: finishReason, Index: 0, - Delta: &schema.Message{Content: ""}, + Delta: &schema.Message{Content: &emptyMessage}, }}, Object: "chat.completion.chunk", Usage: *usage, @@ -105,21 +441,202 @@ func ChatEndpoint(fce *fiberContext.FiberContextExtractor, oais *services.OpenAI w.WriteString("data: [DONE]\n\n") w.Flush() })) - return nil + + // no streaming mode + default: + result, tokenUsage, err := ComputeChoices(input, predInput, config, startupOptions, ml, func(s string, c *[]schema.Choice) { + if !processFunctions { + // no function is called, just reply and use stop as finish reason + *c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}}) + return + } + + results := parseFunctionCall(s, config.FunctionsConfig.ParallelCalls) + noActionsToRun := len(results) > 0 && results[0].name == noActionName + + switch { + case noActionsToRun: + result, err := handleQuestion(config, input, ml, startupOptions, results[0].arguments, predInput) + if err != nil { + log.Error().Err(err).Msg("error handling question") + return + } + *c = append(*c, schema.Choice{ + Message: &schema.Message{Role: "assistant", Content: &result}}) + default: + toolChoice := schema.Choice{ + Message: &schema.Message{ + Role: "assistant", + }, + } + + if len(input.Tools) > 0 { + toolChoice.FinishReason = "tool_calls" + } + + for _, ss := range results { + name, args := ss.name, ss.arguments + if len(input.Tools) > 0 { + // If we are using tools, we condense the function calls into + // a single response choice with all the tools + toolChoice.Message.ToolCalls = append(toolChoice.Message.ToolCalls, + schema.ToolCall{ + ID: id, + Type: "function", + FunctionCall: schema.FunctionCall{ + Name: name, + Arguments: args, + }, + }, + ) + } else { + // otherwise we return more choices directly + *c = append(*c, schema.Choice{ + FinishReason: "function_call", + Message: &schema.Message{ + Role: "assistant", + FunctionCall: map[string]interface{}{ + "name": name, + "arguments": args, + }, + }, + }) + } + } + + if len(input.Tools) > 0 { + // we need to append our result if we are using tools + *c = append(*c, toolChoice) + } + } + + }, nil) + if err != nil { + return err + } + + resp := &schema.OpenAIResponse{ + ID: id, + Created: created, + Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: result, + Object: "chat.completion", + Usage: schema.OpenAIUsage{ + PromptTokens: tokenUsage.Prompt, + CompletionTokens: tokenUsage.Completion, + TotalTokens: tokenUsage.Prompt + tokenUsage.Completion, + }, + } + respData, _ := json.Marshal(resp) + log.Debug().Msgf("Response: %s", respData) + + // Return the prediction in the response body + return c.JSON(resp) } - // TODO is this proper to have exclusive from Stream, or do we need to issue both responses? - rawResponse := <-finalResultChannel - - if rawResponse.Error != nil { - return rawResponse.Error - } - - jsonResult, _ := json.Marshal(rawResponse.Value) - log.Debug().Str("jsonResult", string(jsonResult)).Msg("Chat Final Response") - - // Return the prediction in the response body - return c.JSON(rawResponse.Value) } } + +func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, ml *model.ModelLoader, o *config.ApplicationConfig, args, prompt string) (string, error) { + log.Debug().Msgf("nothing to do, computing a reply") + + // If there is a message that the LLM already sends as part of the JSON reply, use it + arguments := map[string]interface{}{} + json.Unmarshal([]byte(args), &arguments) + m, exists := arguments["message"] + if exists { + switch message := m.(type) { + case string: + if message != "" { + log.Debug().Msgf("Reply received from LLM: %s", message) + message = backend.Finetune(*config, prompt, message) + log.Debug().Msgf("Reply received from LLM(finetuned): %s", message) + + return message, nil + } + } + } + + log.Debug().Msgf("No action received from LLM, without a message, computing a reply") + // Otherwise ask the LLM to understand the JSON output and the context, and return a message + // Note: This costs (in term of CPU/GPU) another computation + config.Grammar = "" + images := []string{} + for _, m := range input.Messages { + images = append(images, m.StringImages...) + } + + predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, ml, *config, o, nil) + if err != nil { + log.Error().Err(err).Msg("model inference failed") + return "", err + } + + prediction, err := predFunc() + if err != nil { + log.Error().Err(err).Msg("prediction failed") + return "", err + } + return backend.Finetune(*config, prompt, prediction.Response), nil +} + +type funcCallResults struct { + name string + arguments string +} + +func parseFunctionCall(llmresult string, multipleResults bool) []funcCallResults { + results := []funcCallResults{} + + // TODO: use generics to avoid this code duplication + if multipleResults { + ss := []map[string]interface{}{} + s := utils.EscapeNewLines(llmresult) + json.Unmarshal([]byte(s), &ss) + log.Debug().Msgf("Function return: %s %+v", s, ss) + + for _, s := range ss { + func_name, ok := s["function"] + if !ok { + continue + } + args, ok := s["arguments"] + if !ok { + continue + } + d, _ := json.Marshal(args) + funcName, ok := func_name.(string) + if !ok { + continue + } + results = append(results, funcCallResults{name: funcName, arguments: string(d)}) + } + } else { + // As we have to change the result before processing, we can't stream the answer token-by-token (yet?) + ss := map[string]interface{}{} + // This prevent newlines to break JSON parsing for clients + s := utils.EscapeNewLines(llmresult) + json.Unmarshal([]byte(s), &ss) + log.Debug().Msgf("Function return: %s %+v", s, ss) + + // The grammar defines the function name as "function", while OpenAI returns "name" + func_name, ok := ss["function"] + if !ok { + return results + } + // Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object + args, ok := ss["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix) + if !ok { + return results + } + d, _ := json.Marshal(args) + funcName, ok := func_name.(string) + if !ok { + return results + } + results = append(results, funcCallResults{name: funcName, arguments: string(d)}) + } + + return results +} diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go index d8b412a9..69923475 100644 --- a/core/http/endpoints/openai/completion.go +++ b/core/http/endpoints/openai/completion.go @@ -4,13 +4,18 @@ import ( "bufio" "bytes" "encoding/json" + "errors" "fmt" + "time" - fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" - "github.com/go-skynet/LocalAI/core/services" + "github.com/go-skynet/LocalAI/core/backend" + "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/schema" + "github.com/go-skynet/LocalAI/pkg/grammar" + model "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" + "github.com/google/uuid" "github.com/rs/zerolog/log" "github.com/valyala/fasthttp" ) @@ -20,50 +25,116 @@ import ( // @Param request body schema.OpenAIRequest true "query params" // @Success 200 {object} schema.OpenAIResponse "Response" // @Router /v1/completions [post] -func CompletionEndpoint(fce *fiberContext.FiberContextExtractor, oais *services.OpenAIService) func(c *fiber.Ctx) error { +func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { + id := uuid.New().String() + created := int(time.Now().Unix()) + + process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) { + ComputeChoices(req, s, config, appConfig, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool { + resp := schema.OpenAIResponse{ + ID: id, + Created: created, + Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: []schema.Choice{ + { + Index: 0, + Text: s, + }, + }, + Object: "text_completion", + Usage: schema.OpenAIUsage{ + PromptTokens: usage.Prompt, + CompletionTokens: usage.Completion, + TotalTokens: usage.Prompt + usage.Completion, + }, + } + log.Debug().Msgf("Sending goroutine: %s", s) + + responses <- resp + return true + }) + close(responses) + } + return func(c *fiber.Ctx) error { - _, request, err := fce.OpenAIRequestFromContext(c, false) + modelFile, input, err := readRequest(c, ml, appConfig, true) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } - log.Debug().Msgf("`OpenAIRequest`: %+v", request) + log.Debug().Msgf("`input`: %+v", input) - traceID, finalResultChannel, _, _, tokenChannel, err := oais.Completion(request, false, request.Stream) + config, input, err := mergeRequestWithConfig(modelFile, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16) if err != nil { - return err + return fmt.Errorf("failed reading parameters from request:%w", err) } - if request.Stream { - log.Debug().Msgf("Completion Stream request received") + if input.ResponseFormat.Type == "json_object" { + input.Grammar = grammar.JSONBNF + } + config.Grammar = input.Grammar + + log.Debug().Msgf("Parameter Config: %+v", config) + + if input.Stream { + log.Debug().Msgf("Stream request received") c.Context().SetContentType("text/event-stream") //c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8) //c.Set("Content-Type", "text/event-stream") c.Set("Cache-Control", "no-cache") c.Set("Connection", "keep-alive") c.Set("Transfer-Encoding", "chunked") + } + + templateFile := "" + + // A model can have a "file.bin.tmpl" file associated with a prompt template prefix + if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) { + templateFile = config.Model + } + + if config.TemplateConfig.Completion != "" { + templateFile = config.TemplateConfig.Completion + } + + if input.Stream { + if len(config.PromptStrings) > 1 { + return errors.New("cannot handle more than 1 `PromptStrings` when Streaming") + } + + predInput := config.PromptStrings[0] + + if templateFile != "" { + templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{ + Input: predInput, + }) + if err == nil { + predInput = templatedInput + log.Debug().Msgf("Template found, input modified to: %s", predInput) + } + } + + responses := make(chan schema.OpenAIResponse) + + go process(predInput, input, config, ml, responses) c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) { - for ev := range tokenChannel { + + for ev := range responses { var buf bytes.Buffer enc := json.NewEncoder(&buf) - if ev.Error != nil { - log.Debug().Msgf("[CompletionEndpoint] error to debug during tokenChannel handler: %q", ev.Error) - enc.Encode(ev.Error) - } else { - enc.Encode(ev.Value) - } + enc.Encode(ev) - log.Debug().Msgf("completion streaming sending chunk: %s", buf.String()) + log.Debug().Msgf("Sending chunk: %s", buf.String()) fmt.Fprintf(w, "data: %v\n", buf.String()) w.Flush() } resp := &schema.OpenAIResponse{ - ID: traceID.ID, - Created: traceID.Created, - Model: request.Model, // we have to return what the user sent here, due to OpenAI spec. + ID: id, + Created: created, + Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. Choices: []schema.Choice{ { Index: 0, @@ -80,15 +151,55 @@ func CompletionEndpoint(fce *fiberContext.FiberContextExtractor, oais *services. })) return nil } - // TODO is this proper to have exclusive from Stream, or do we need to issue both responses? - rawResponse := <-finalResultChannel - if rawResponse.Error != nil { - return rawResponse.Error + + var result []schema.Choice + + totalTokenUsage := backend.TokenUsage{} + + for k, i := range config.PromptStrings { + if templateFile != "" { + // A model can have a "file.bin.tmpl" file associated with a prompt template prefix + templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{ + SystemPrompt: config.SystemPrompt, + Input: i, + }) + if err == nil { + i = templatedInput + log.Debug().Msgf("Template found, input modified to: %s", i) + } + } + + r, tokenUsage, err := ComputeChoices( + input, i, config, appConfig, ml, func(s string, c *[]schema.Choice) { + *c = append(*c, schema.Choice{Text: s, FinishReason: "stop", Index: k}) + }, nil) + if err != nil { + return err + } + + totalTokenUsage.Prompt += tokenUsage.Prompt + totalTokenUsage.Completion += tokenUsage.Completion + + result = append(result, r...) } - jsonResult, _ := json.Marshal(rawResponse.Value) + + resp := &schema.OpenAIResponse{ + ID: id, + Created: created, + Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: result, + Object: "text_completion", + Usage: schema.OpenAIUsage{ + PromptTokens: totalTokenUsage.Prompt, + CompletionTokens: totalTokenUsage.Completion, + TotalTokens: totalTokenUsage.Prompt + totalTokenUsage.Completion, + }, + } + + jsonResult, _ := json.Marshal(resp) log.Debug().Msgf("Response: %s", jsonResult) // Return the prediction in the response body - return c.JSON(rawResponse.Value) + return c.JSON(resp) } } diff --git a/core/http/endpoints/openai/edit.go b/core/http/endpoints/openai/edit.go index a33050dd..25497095 100644 --- a/core/http/endpoints/openai/edit.go +++ b/core/http/endpoints/openai/edit.go @@ -3,36 +3,92 @@ package openai import ( "encoding/json" "fmt" + "time" - fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" - "github.com/go-skynet/LocalAI/core/services" + "github.com/go-skynet/LocalAI/core/backend" + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/schema" + model "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" + "github.com/google/uuid" "github.com/rs/zerolog/log" ) -func EditEndpoint(fce *fiberContext.FiberContextExtractor, oais *services.OpenAIService) func(c *fiber.Ctx) error { +func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - _, request, err := fce.OpenAIRequestFromContext(c, false) + modelFile, input, err := readRequest(c, ml, appConfig, true) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } - _, finalResultChannel, _, _, _, err := oais.Edit(request, false, request.Stream) + config, input, err := mergeRequestWithConfig(modelFile, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16) if err != nil { - return err + return fmt.Errorf("failed reading parameters from request:%w", err) } - rawResponse := <-finalResultChannel - if rawResponse.Error != nil { - return rawResponse.Error + log.Debug().Msgf("Parameter Config: %+v", config) + + templateFile := "" + + // A model can have a "file.bin.tmpl" file associated with a prompt template prefix + if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) { + templateFile = config.Model } - jsonResult, _ := json.Marshal(rawResponse.Value) + if config.TemplateConfig.Edit != "" { + templateFile = config.TemplateConfig.Edit + } + + var result []schema.Choice + totalTokenUsage := backend.TokenUsage{} + + for _, i := range config.InputStrings { + if templateFile != "" { + templatedInput, err := ml.EvaluateTemplateForPrompt(model.EditPromptTemplate, templateFile, model.PromptTemplateData{ + Input: i, + Instruction: input.Instruction, + SystemPrompt: config.SystemPrompt, + }) + if err == nil { + i = templatedInput + log.Debug().Msgf("Template found, input modified to: %s", i) + } + } + + r, tokenUsage, err := ComputeChoices(input, i, config, appConfig, ml, func(s string, c *[]schema.Choice) { + *c = append(*c, schema.Choice{Text: s}) + }, nil) + if err != nil { + return err + } + + totalTokenUsage.Prompt += tokenUsage.Prompt + totalTokenUsage.Completion += tokenUsage.Completion + + result = append(result, r...) + } + + id := uuid.New().String() + created := int(time.Now().Unix()) + resp := &schema.OpenAIResponse{ + ID: id, + Created: created, + Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. + Choices: result, + Object: "edit", + Usage: schema.OpenAIUsage{ + PromptTokens: totalTokenUsage.Prompt, + CompletionTokens: totalTokenUsage.Completion, + TotalTokens: totalTokenUsage.Prompt + totalTokenUsage.Completion, + }, + } + + jsonResult, _ := json.Marshal(resp) log.Debug().Msgf("Response: %s", jsonResult) // Return the prediction in the response body - return c.JSON(rawResponse.Value) + return c.JSON(resp) } } diff --git a/core/http/endpoints/openai/embeddings.go b/core/http/endpoints/openai/embeddings.go index be546991..eca34f79 100644 --- a/core/http/endpoints/openai/embeddings.go +++ b/core/http/endpoints/openai/embeddings.go @@ -3,9 +3,14 @@ package openai import ( "encoding/json" "fmt" + "time" "github.com/go-skynet/LocalAI/core/backend" - fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/pkg/model" + + "github.com/go-skynet/LocalAI/core/schema" + "github.com/google/uuid" "github.com/gofiber/fiber/v2" "github.com/rs/zerolog/log" @@ -16,25 +21,63 @@ import ( // @Param request body schema.OpenAIRequest true "query params" // @Success 200 {object} schema.OpenAIResponse "Response" // @Router /v1/embeddings [post] -func EmbeddingsEndpoint(fce *fiberContext.FiberContextExtractor, ebs *backend.EmbeddingsBackendService) func(c *fiber.Ctx) error { +func EmbeddingsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - _, input, err := fce.OpenAIRequestFromContext(c, true) + model, input, err := readRequest(c, ml, appConfig, true) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } - responseChannel := ebs.Embeddings(input) - - rawResponse := <-responseChannel - - if rawResponse.Error != nil { - return rawResponse.Error + config, input, err := mergeRequestWithConfig(model, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16) + if err != nil { + return fmt.Errorf("failed reading parameters from request:%w", err) } - jsonResult, _ := json.Marshal(rawResponse.Value) + log.Debug().Msgf("Parameter Config: %+v", config) + items := []schema.Item{} + + for i, s := range config.InputToken { + // get the model function to call for the result + embedFn, err := backend.ModelEmbedding("", s, ml, *config, appConfig) + if err != nil { + return err + } + + embeddings, err := embedFn() + if err != nil { + return err + } + items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"}) + } + + for i, s := range config.InputStrings { + // get the model function to call for the result + embedFn, err := backend.ModelEmbedding(s, []int{}, ml, *config, appConfig) + if err != nil { + return err + } + + embeddings, err := embedFn() + if err != nil { + return err + } + items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"}) + } + + id := uuid.New().String() + created := int(time.Now().Unix()) + resp := &schema.OpenAIResponse{ + ID: id, + Created: created, + Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. + Data: items, + Object: "list", + } + + jsonResult, _ := json.Marshal(resp) log.Debug().Msgf("Response: %s", jsonResult) // Return the prediction in the response body - return c.JSON(rawResponse.Value) + return c.JSON(resp) } } diff --git a/core/http/endpoints/openai/image.go b/core/http/endpoints/openai/image.go index ec3d84da..9e806b3e 100644 --- a/core/http/endpoints/openai/image.go +++ b/core/http/endpoints/openai/image.go @@ -1,18 +1,50 @@ package openai import ( + "bufio" + "encoding/base64" "encoding/json" "fmt" + "io" + "net/http" + "os" + "path/filepath" + "strconv" + "strings" + "time" - fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/schema" + "github.com/google/uuid" "github.com/go-skynet/LocalAI/core/backend" + model "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" "github.com/rs/zerolog/log" ) -// https://platform.openai.com/docs/api-reference/images/create +func downloadFile(url string) (string, error) { + // Get the data + resp, err := http.Get(url) + if err != nil { + return "", err + } + defer resp.Body.Close() + + // Create the file + out, err := os.CreateTemp("", "image") + if err != nil { + return "", err + } + defer out.Close() + + // Write the body to file + _, err = io.Copy(out, resp.Body) + return out.Name(), err +} + +// /* * @@ -27,36 +59,186 @@ import ( * */ - // ImageEndpoint is the OpenAI Image generation API endpoint https://platform.openai.com/docs/api-reference/images/create // @Summary Creates an image given a prompt. // @Param request body schema.OpenAIRequest true "query params" // @Success 200 {object} schema.OpenAIResponse "Response" // @Router /v1/images/generations [post] -func ImageEndpoint(fce *fiberContext.FiberContextExtractor, igbs *backend.ImageGenerationBackendService) func(c *fiber.Ctx) error { +func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - // TODO: Somewhat a hack. Is there a better place to assign this? - if igbs.BaseUrlForGeneratedImages == "" { - igbs.BaseUrlForGeneratedImages = c.BaseURL() + "/generated-images/" - } - _, request, err := fce.OpenAIRequestFromContext(c, false) + m, input, err := readRequest(c, ml, appConfig, false) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } - responseChannel := igbs.GenerateImage(request) - rawResponse := <-responseChannel - - if rawResponse.Error != nil { - return rawResponse.Error + if m == "" { + m = model.StableDiffusionBackend } + log.Debug().Msgf("Loading model: %+v", m) - jsonResult, err := json.Marshal(rawResponse.Value) + config, input, err := mergeRequestWithConfig(m, input, cl, ml, appConfig.Debug, 0, 0, false) if err != nil { - return err + return fmt.Errorf("failed reading parameters from request:%w", err) } + + src := "" + if input.File != "" { + + fileData := []byte{} + // check if input.File is an URL, if so download it and save it + // to a temporary file + if strings.HasPrefix(input.File, "http://") || strings.HasPrefix(input.File, "https://") { + out, err := downloadFile(input.File) + if err != nil { + return fmt.Errorf("failed downloading file:%w", err) + } + defer os.RemoveAll(out) + + fileData, err = os.ReadFile(out) + if err != nil { + return fmt.Errorf("failed reading file:%w", err) + } + + } else { + // base 64 decode the file and write it somewhere + // that we will cleanup + fileData, err = base64.StdEncoding.DecodeString(input.File) + if err != nil { + return err + } + } + + // Create a temporary file + outputFile, err := os.CreateTemp(appConfig.ImageDir, "b64") + if err != nil { + return err + } + // write the base64 result + writer := bufio.NewWriter(outputFile) + _, err = writer.Write(fileData) + if err != nil { + outputFile.Close() + return err + } + outputFile.Close() + src = outputFile.Name() + defer os.RemoveAll(src) + } + + log.Debug().Msgf("Parameter Config: %+v", config) + + switch config.Backend { + case "stablediffusion": + config.Backend = model.StableDiffusionBackend + case "tinydream": + config.Backend = model.TinyDreamBackend + case "": + config.Backend = model.StableDiffusionBackend + } + + sizeParts := strings.Split(input.Size, "x") + if len(sizeParts) != 2 { + return fmt.Errorf("invalid value for 'size'") + } + width, err := strconv.Atoi(sizeParts[0]) + if err != nil { + return fmt.Errorf("invalid value for 'size'") + } + height, err := strconv.Atoi(sizeParts[1]) + if err != nil { + return fmt.Errorf("invalid value for 'size'") + } + + b64JSON := false + if input.ResponseFormat.Type == "b64_json" { + b64JSON = true + } + // src and clip_skip + var result []schema.Item + for _, i := range config.PromptStrings { + n := input.N + if input.N == 0 { + n = 1 + } + for j := 0; j < n; j++ { + prompts := strings.Split(i, "|") + positive_prompt := prompts[0] + negative_prompt := "" + if len(prompts) > 1 { + negative_prompt = prompts[1] + } + + mode := 0 + step := config.Step + if step == 0 { + step = 15 + } + + if input.Mode != 0 { + mode = input.Mode + } + + if input.Step != 0 { + step = input.Step + } + + tempDir := "" + if !b64JSON { + tempDir = appConfig.ImageDir + } + // Create a temporary file + outputFile, err := os.CreateTemp(tempDir, "b64") + if err != nil { + return err + } + outputFile.Close() + output := outputFile.Name() + ".png" + // Rename the temporary file + err = os.Rename(outputFile.Name(), output) + if err != nil { + return err + } + + baseURL := c.BaseURL() + + fn, err := backend.ImageGeneration(height, width, mode, step, *config.Seed, positive_prompt, negative_prompt, src, output, ml, *config, appConfig) + if err != nil { + return err + } + if err := fn(); err != nil { + return err + } + + item := &schema.Item{} + + if b64JSON { + defer os.RemoveAll(output) + data, err := os.ReadFile(output) + if err != nil { + return err + } + item.B64JSON = base64.StdEncoding.EncodeToString(data) + } else { + base := filepath.Base(output) + item.URL = baseURL + "/generated-images/" + base + } + + result = append(result, *item) + } + } + + id := uuid.New().String() + created := int(time.Now().Unix()) + resp := &schema.OpenAIResponse{ + ID: id, + Created: created, + Data: result, + } + + jsonResult, _ := json.Marshal(resp) log.Debug().Msgf("Response: %s", jsonResult) + // Return the prediction in the response body - return c.JSON(rawResponse.Value) + return c.JSON(resp) } } diff --git a/core/http/endpoints/openai/inference.go b/core/http/endpoints/openai/inference.go new file mode 100644 index 00000000..06e784b7 --- /dev/null +++ b/core/http/endpoints/openai/inference.go @@ -0,0 +1,55 @@ +package openai + +import ( + "github.com/go-skynet/LocalAI/core/backend" + "github.com/go-skynet/LocalAI/core/config" + + "github.com/go-skynet/LocalAI/core/schema" + model "github.com/go-skynet/LocalAI/pkg/model" +) + +func ComputeChoices( + req *schema.OpenAIRequest, + predInput string, + config *config.BackendConfig, + o *config.ApplicationConfig, + loader *model.ModelLoader, + cb func(string, *[]schema.Choice), + tokenCallback func(string, backend.TokenUsage) bool) ([]schema.Choice, backend.TokenUsage, error) { + n := req.N // number of completions to return + result := []schema.Choice{} + + if n == 0 { + n = 1 + } + + images := []string{} + for _, m := range req.Messages { + images = append(images, m.StringImages...) + } + + // get the model function to call for the result + predFunc, err := backend.ModelInference(req.Context, predInput, req.Messages, images, loader, *config, o, tokenCallback) + if err != nil { + return result, backend.TokenUsage{}, err + } + + tokenUsage := backend.TokenUsage{} + + for i := 0; i < n; i++ { + prediction, err := predFunc() + if err != nil { + return result, backend.TokenUsage{}, err + } + + tokenUsage.Prompt += prediction.Usage.Prompt + tokenUsage.Completion += prediction.Usage.Completion + + finetunedResponse := backend.Finetune(*config, predInput, prediction.Response) + cb(finetunedResponse, &result) + + //result = append(result, Choice{Text: prediction}) + + } + return result, tokenUsage, err +} diff --git a/core/http/endpoints/openai/list.go b/core/http/endpoints/openai/list.go index 9bb2b2ca..04e611a2 100644 --- a/core/http/endpoints/openai/list.go +++ b/core/http/endpoints/openai/list.go @@ -1,21 +1,61 @@ package openai import ( + "regexp" + + "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/schema" - "github.com/go-skynet/LocalAI/core/services" + model "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" ) -func ListModelsEndpoint(lms *services.ListModelsService) func(ctx *fiber.Ctx) error { +func ListModelsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader) func(ctx *fiber.Ctx) error { return func(c *fiber.Ctx) error { - // If blank, no filter is applied. + models, err := ml.ListModels() + if err != nil { + return err + } + var mm map[string]interface{} = map[string]interface{}{} + + dataModels := []schema.OpenAIModel{} + + var filterFn func(name string) bool filter := c.Query("filter") + + // If filter is not specified, do not filter the list by model name + if filter == "" { + filterFn = func(_ string) bool { return true } + } else { + // If filter _IS_ specified, we compile it to a regex which is used to create the filterFn + rxp, err := regexp.Compile(filter) + if err != nil { + return err + } + filterFn = func(name string) bool { + return rxp.MatchString(name) + } + } + // By default, exclude any loose files that are already referenced by a configuration file. excludeConfigured := c.QueryBool("excludeConfigured", true) - dataModels, err := lms.ListModels(filter, excludeConfigured) - if err != nil { - return err + // Start with the known configurations + for _, c := range cl.GetAllBackendConfigs() { + if excludeConfigured { + mm[c.Model] = nil + } + + if filterFn(c.Name) { + dataModels = append(dataModels, schema.OpenAIModel{ID: c.Name, Object: "model"}) + } + } + + // Then iterate through the loose files: + for _, m := range models { + // And only adds them if they shouldn't be skipped. + if _, exists := mm[m]; !exists && filterFn(m) { + dataModels = append(dataModels, schema.OpenAIModel{ID: m, Object: "model"}) + } } return c.JSON(struct { diff --git a/core/http/endpoints/openai/request.go b/core/http/endpoints/openai/request.go new file mode 100644 index 00000000..369fb0b8 --- /dev/null +++ b/core/http/endpoints/openai/request.go @@ -0,0 +1,285 @@ +package openai + +import ( + "context" + "encoding/base64" + "encoding/json" + "fmt" + "io" + "net/http" + "strings" + + "github.com/go-skynet/LocalAI/core/config" + fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" + "github.com/go-skynet/LocalAI/core/schema" + "github.com/go-skynet/LocalAI/pkg/grammar" + model "github.com/go-skynet/LocalAI/pkg/model" + "github.com/gofiber/fiber/v2" + "github.com/rs/zerolog/log" +) + +func readRequest(c *fiber.Ctx, ml *model.ModelLoader, o *config.ApplicationConfig, firstModel bool) (string, *schema.OpenAIRequest, error) { + input := new(schema.OpenAIRequest) + + // Get input data from the request body + if err := c.BodyParser(input); err != nil { + return "", nil, fmt.Errorf("failed parsing request body: %w", err) + } + + received, _ := json.Marshal(input) + + ctx, cancel := context.WithCancel(o.Context) + input.Context = ctx + input.Cancel = cancel + + log.Debug().Msgf("Request received: %s", string(received)) + + modelFile, err := fiberContext.ModelFromContext(c, ml, input.Model, firstModel) + + return modelFile, input, err +} + +// this function check if the string is an URL, if it's an URL downloads the image in memory +// encodes it in base64 and returns the base64 string +func getBase64Image(s string) (string, error) { + if strings.HasPrefix(s, "http") { + // download the image + resp, err := http.Get(s) + if err != nil { + return "", err + } + defer resp.Body.Close() + + // read the image data into memory + data, err := io.ReadAll(resp.Body) + if err != nil { + return "", err + } + + // encode the image data in base64 + encoded := base64.StdEncoding.EncodeToString(data) + + // return the base64 string + return encoded, nil + } + + // if the string instead is prefixed with "data:image/jpeg;base64,", drop it + if strings.HasPrefix(s, "data:image/jpeg;base64,") { + return strings.ReplaceAll(s, "data:image/jpeg;base64,", ""), nil + } + return "", fmt.Errorf("not valid string") +} + +func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIRequest) { + if input.Echo { + config.Echo = input.Echo + } + if input.TopK != nil { + config.TopK = input.TopK + } + if input.TopP != nil { + config.TopP = input.TopP + } + + if input.Backend != "" { + config.Backend = input.Backend + } + + if input.ClipSkip != 0 { + config.Diffusers.ClipSkip = input.ClipSkip + } + + if input.ModelBaseName != "" { + config.AutoGPTQ.ModelBaseName = input.ModelBaseName + } + + if input.NegativePromptScale != 0 { + config.NegativePromptScale = input.NegativePromptScale + } + + if input.UseFastTokenizer { + config.UseFastTokenizer = input.UseFastTokenizer + } + + if input.NegativePrompt != "" { + config.NegativePrompt = input.NegativePrompt + } + + if input.RopeFreqBase != 0 { + config.RopeFreqBase = input.RopeFreqBase + } + + if input.RopeFreqScale != 0 { + config.RopeFreqScale = input.RopeFreqScale + } + + if input.Grammar != "" { + config.Grammar = input.Grammar + } + + if input.Temperature != nil { + config.Temperature = input.Temperature + } + + if input.Maxtokens != nil { + config.Maxtokens = input.Maxtokens + } + + switch stop := input.Stop.(type) { + case string: + if stop != "" { + config.StopWords = append(config.StopWords, stop) + } + case []interface{}: + for _, pp := range stop { + if s, ok := pp.(string); ok { + config.StopWords = append(config.StopWords, s) + } + } + } + + if len(input.Tools) > 0 { + for _, tool := range input.Tools { + input.Functions = append(input.Functions, tool.Function) + } + } + + if input.ToolsChoice != nil { + var toolChoice grammar.Tool + + switch content := input.ToolsChoice.(type) { + case string: + _ = json.Unmarshal([]byte(content), &toolChoice) + case map[string]interface{}: + dat, _ := json.Marshal(content) + _ = json.Unmarshal(dat, &toolChoice) + } + input.FunctionCall = map[string]interface{}{ + "name": toolChoice.Function.Name, + } + } + + // Decode each request's message content + index := 0 + for i, m := range input.Messages { + switch content := m.Content.(type) { + case string: + input.Messages[i].StringContent = content + case []interface{}: + dat, _ := json.Marshal(content) + c := []schema.Content{} + json.Unmarshal(dat, &c) + for _, pp := range c { + if pp.Type == "text" { + input.Messages[i].StringContent = pp.Text + } else if pp.Type == "image_url" { + // Detect if pp.ImageURL is an URL, if it is download the image and encode it in base64: + base64, err := getBase64Image(pp.ImageURL.URL) + if err == nil { + input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff + // set a placeholder for each image + input.Messages[i].StringContent = fmt.Sprintf("[img-%d]", index) + input.Messages[i].StringContent + index++ + } else { + fmt.Print("Failed encoding image", err) + } + } + } + } + } + + if input.RepeatPenalty != 0 { + config.RepeatPenalty = input.RepeatPenalty + } + + if input.FrequencyPenalty != 0 { + config.FrequencyPenalty = input.FrequencyPenalty + } + + if input.PresencePenalty != 0 { + config.PresencePenalty = input.PresencePenalty + } + + if input.Keep != 0 { + config.Keep = input.Keep + } + + if input.Batch != 0 { + config.Batch = input.Batch + } + + if input.IgnoreEOS { + config.IgnoreEOS = input.IgnoreEOS + } + + if input.Seed != nil { + config.Seed = input.Seed + } + + if input.TypicalP != nil { + config.TypicalP = input.TypicalP + } + + switch inputs := input.Input.(type) { + case string: + if inputs != "" { + config.InputStrings = append(config.InputStrings, inputs) + } + case []interface{}: + for _, pp := range inputs { + switch i := pp.(type) { + case string: + config.InputStrings = append(config.InputStrings, i) + case []interface{}: + tokens := []int{} + for _, ii := range i { + tokens = append(tokens, int(ii.(float64))) + } + config.InputToken = append(config.InputToken, tokens) + } + } + } + + // Can be either a string or an object + switch fnc := input.FunctionCall.(type) { + case string: + if fnc != "" { + config.SetFunctionCallString(fnc) + } + case map[string]interface{}: + var name string + n, exists := fnc["name"] + if exists { + nn, e := n.(string) + if e { + name = nn + } + } + config.SetFunctionCallNameString(name) + } + + switch p := input.Prompt.(type) { + case string: + config.PromptStrings = append(config.PromptStrings, p) + case []interface{}: + for _, pp := range p { + if s, ok := pp.(string); ok { + config.PromptStrings = append(config.PromptStrings, s) + } + } + } +} + +func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *config.BackendConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.BackendConfig, *schema.OpenAIRequest, error) { + cfg, err := cm.LoadBackendConfigFileByName(modelFile, loader.ModelPath, + config.LoadOptionDebug(debug), + config.LoadOptionThreads(threads), + config.LoadOptionContextSize(ctx), + config.LoadOptionF16(f16), + ) + + // Set the parameters for the language model prediction + updateRequestConfig(cfg, input) + + return cfg, input, err +} diff --git a/core/http/endpoints/openai/transcription.go b/core/http/endpoints/openai/transcription.go index 572cec12..c7dd39e7 100644 --- a/core/http/endpoints/openai/transcription.go +++ b/core/http/endpoints/openai/transcription.go @@ -9,7 +9,8 @@ import ( "path/filepath" "github.com/go-skynet/LocalAI/core/backend" - fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" + "github.com/go-skynet/LocalAI/core/config" + model "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" "github.com/rs/zerolog/log" @@ -22,15 +23,17 @@ import ( // @Param file formData file true "file" // @Success 200 {object} map[string]string "Response" // @Router /v1/audio/transcriptions [post] -func TranscriptEndpoint(fce *fiberContext.FiberContextExtractor, tbs *backend.TranscriptionBackendService) func(c *fiber.Ctx) error { +func TranscriptEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - _, request, err := fce.OpenAIRequestFromContext(c, false) + m, input, err := readRequest(c, ml, appConfig, false) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } - // TODO: Investigate this file copy stuff later - potentially belongs in service. - + config, input, err := mergeRequestWithConfig(m, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16) + if err != nil { + return fmt.Errorf("failed reading parameters from request:%w", err) + } // retrieve the file data from the request file, err := c.FormFile("file") if err != nil { @@ -62,16 +65,13 @@ func TranscriptEndpoint(fce *fiberContext.FiberContextExtractor, tbs *backend.Tr log.Debug().Msgf("Audio file copied to: %+v", dst) - request.File = dst - - responseChannel := tbs.Transcribe(request) - rawResponse := <-responseChannel - - if rawResponse.Error != nil { - return rawResponse.Error + tr, err := backend.ModelTranscription(dst, input.Language, ml, *config, appConfig) + if err != nil { + return err } - log.Debug().Msgf("Transcribed: %+v", rawResponse.Value) + + log.Debug().Msgf("Trascribed: %+v", tr) // TODO: handle different outputs here - return c.Status(http.StatusOK).JSON(rawResponse.Value) + return c.Status(http.StatusOK).JSON(tr) } } diff --git a/core/schema/transcription.go b/core/schema/whisper.go similarity index 90% rename from core/schema/transcription.go rename to core/schema/whisper.go index fe1799fa..41413c1f 100644 --- a/core/schema/transcription.go +++ b/core/schema/whisper.go @@ -10,7 +10,7 @@ type Segment struct { Tokens []int `json:"tokens"` } -type TranscriptionResult struct { +type Result struct { Segments []Segment `json:"segments"` Text string `json:"text"` } diff --git a/core/services/backend_monitor.go b/core/services/backend_monitor.go index a610432c..979a67a3 100644 --- a/core/services/backend_monitor.go +++ b/core/services/backend_monitor.go @@ -15,22 +15,22 @@ import ( gopsutil "github.com/shirou/gopsutil/v3/process" ) -type BackendMonitorService struct { +type BackendMonitor struct { configLoader *config.BackendConfigLoader modelLoader *model.ModelLoader options *config.ApplicationConfig // Taking options in case we need to inspect ExternalGRPCBackends, though that's out of scope for now, hence the name. } -func NewBackendMonitorService(modelLoader *model.ModelLoader, configLoader *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *BackendMonitorService { - return &BackendMonitorService{ +func NewBackendMonitor(configLoader *config.BackendConfigLoader, modelLoader *model.ModelLoader, appConfig *config.ApplicationConfig) BackendMonitor { + return BackendMonitor{ configLoader: configLoader, modelLoader: modelLoader, options: appConfig, } } -func (bms BackendMonitorService) getModelLoaderIDFromModelName(modelName string) (string, error) { - config, exists := bms.configLoader.GetBackendConfig(modelName) +func (bm BackendMonitor) getModelLoaderIDFromModelName(modelName string) (string, error) { + config, exists := bm.configLoader.GetBackendConfig(modelName) var backendId string if exists { backendId = config.Model @@ -46,8 +46,8 @@ func (bms BackendMonitorService) getModelLoaderIDFromModelName(modelName string) return backendId, nil } -func (bms *BackendMonitorService) SampleLocalBackendProcess(model string) (*schema.BackendMonitorResponse, error) { - config, exists := bms.configLoader.GetBackendConfig(model) +func (bm *BackendMonitor) SampleLocalBackendProcess(model string) (*schema.BackendMonitorResponse, error) { + config, exists := bm.configLoader.GetBackendConfig(model) var backend string if exists { backend = config.Model @@ -60,7 +60,7 @@ func (bms *BackendMonitorService) SampleLocalBackendProcess(model string) (*sche backend = fmt.Sprintf("%s.bin", backend) } - pid, err := bms.modelLoader.GetGRPCPID(backend) + pid, err := bm.modelLoader.GetGRPCPID(backend) if err != nil { log.Error().Err(err).Str("model", model).Msg("failed to find GRPC pid") @@ -101,12 +101,12 @@ func (bms *BackendMonitorService) SampleLocalBackendProcess(model string) (*sche }, nil } -func (bms BackendMonitorService) CheckAndSample(modelName string) (*proto.StatusResponse, error) { - backendId, err := bms.getModelLoaderIDFromModelName(modelName) +func (bm BackendMonitor) CheckAndSample(modelName string) (*proto.StatusResponse, error) { + backendId, err := bm.getModelLoaderIDFromModelName(modelName) if err != nil { return nil, err } - modelAddr := bms.modelLoader.CheckIsLoaded(backendId) + modelAddr := bm.modelLoader.CheckIsLoaded(backendId) if modelAddr == "" { return nil, fmt.Errorf("backend %s is not currently loaded", backendId) } @@ -114,7 +114,7 @@ func (bms BackendMonitorService) CheckAndSample(modelName string) (*proto.Status status, rpcErr := modelAddr.GRPC(false, nil).Status(context.TODO()) if rpcErr != nil { log.Warn().Msgf("backend %s experienced an error retrieving status info: %s", backendId, rpcErr.Error()) - val, slbErr := bms.SampleLocalBackendProcess(backendId) + val, slbErr := bm.SampleLocalBackendProcess(backendId) if slbErr != nil { return nil, fmt.Errorf("backend %s experienced an error retrieving status info via rpc: %s, then failed local node process sample: %s", backendId, rpcErr.Error(), slbErr.Error()) } @@ -131,10 +131,10 @@ func (bms BackendMonitorService) CheckAndSample(modelName string) (*proto.Status return status, nil } -func (bms BackendMonitorService) ShutdownModel(modelName string) error { - backendId, err := bms.getModelLoaderIDFromModelName(modelName) +func (bm BackendMonitor) ShutdownModel(modelName string) error { + backendId, err := bm.getModelLoaderIDFromModelName(modelName) if err != nil { return err } - return bms.modelLoader.ShutdownModel(backendId) + return bm.modelLoader.ShutdownModel(backendId) } diff --git a/core/services/gallery.go b/core/services/gallery.go index 1ef8e3e2..b068abbb 100644 --- a/core/services/gallery.go +++ b/core/services/gallery.go @@ -3,18 +3,14 @@ package services import ( "context" "encoding/json" - "errors" "os" - "path/filepath" "strings" "sync" "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/embedded" - "github.com/go-skynet/LocalAI/pkg/downloader" "github.com/go-skynet/LocalAI/pkg/gallery" + "github.com/go-skynet/LocalAI/pkg/startup" "github.com/go-skynet/LocalAI/pkg/utils" - "github.com/rs/zerolog/log" "gopkg.in/yaml.v2" ) @@ -33,6 +29,18 @@ func NewGalleryService(modelPath string) *GalleryService { } } +func prepareModel(modelPath string, req gallery.GalleryModel, cl *config.BackendConfigLoader, downloadStatus func(string, string, string, float64)) error { + + config, err := gallery.GetGalleryConfigFromURL(req.URL) + if err != nil { + return err + } + + config.Files = append(config.Files, req.AdditionalFiles...) + + return gallery.InstallModel(modelPath, req.Name, &config, req.Overrides, downloadStatus) +} + func (g *GalleryService) UpdateStatus(s string, op *gallery.GalleryOpStatus) { g.Lock() defer g.Unlock() @@ -84,10 +92,10 @@ func (g *GalleryService) Start(c context.Context, cl *config.BackendConfigLoader err = gallery.InstallModelFromGalleryByName(op.Galleries, op.GalleryName, g.modelPath, op.Req, progressCallback) } } else if op.ConfigURL != "" { - PreloadModelsConfigurations(op.ConfigURL, g.modelPath, op.ConfigURL) + startup.PreloadModelsConfigurations(op.ConfigURL, g.modelPath, op.ConfigURL) err = cl.Preload(g.modelPath) } else { - err = prepareModel(g.modelPath, op.Req, progressCallback) + err = prepareModel(g.modelPath, op.Req, cl, progressCallback) } if err != nil { @@ -119,12 +127,13 @@ type galleryModel struct { ID string `json:"id"` } -func processRequests(modelPath string, galleries []gallery.Gallery, requests []galleryModel) error { +func processRequests(modelPath, s string, cm *config.BackendConfigLoader, galleries []gallery.Gallery, requests []galleryModel) error { var err error for _, r := range requests { utils.ResetDownloadTimers() if r.ID == "" { - err = prepareModel(modelPath, r.GalleryModel, utils.DisplayDownloadFunction) + err = prepareModel(modelPath, r.GalleryModel, cm, utils.DisplayDownloadFunction) + } else { if strings.Contains(r.ID, "@") { err = gallery.InstallModelFromGallery( @@ -149,7 +158,7 @@ func ApplyGalleryFromFile(modelPath, s string, cl *config.BackendConfigLoader, g return err } - return processRequests(modelPath, galleries, requests) + return processRequests(modelPath, s, cl, galleries, requests) } func ApplyGalleryFromString(modelPath, s string, cl *config.BackendConfigLoader, galleries []gallery.Gallery) error { @@ -159,90 +168,5 @@ func ApplyGalleryFromString(modelPath, s string, cl *config.BackendConfigLoader, return err } - return processRequests(modelPath, galleries, requests) -} - -// PreloadModelsConfigurations will preload models from the given list of URLs -// It will download the model if it is not already present in the model path -// It will also try to resolve if the model is an embedded model YAML configuration -func PreloadModelsConfigurations(modelLibraryURL string, modelPath string, models ...string) { - for _, url := range models { - - // As a best effort, try to resolve the model from the remote library - // if it's not resolved we try with the other method below - if modelLibraryURL != "" { - lib, err := embedded.GetRemoteLibraryShorteners(modelLibraryURL) - if err == nil { - if lib[url] != "" { - log.Debug().Msgf("[startup] model configuration is defined remotely: %s (%s)", url, lib[url]) - url = lib[url] - } - } - } - - url = embedded.ModelShortURL(url) - switch { - case embedded.ExistsInModelsLibrary(url): - modelYAML, err := embedded.ResolveContent(url) - // If we resolve something, just save it to disk and continue - if err != nil { - log.Error().Err(err).Msg("error resolving model content") - continue - } - - log.Debug().Msgf("[startup] resolved embedded model: %s", url) - md5Name := utils.MD5(url) - modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml" - if err := os.WriteFile(modelDefinitionFilePath, modelYAML, os.ModePerm); err != nil { - log.Error().Err(err).Str("filepath", modelDefinitionFilePath).Msg("error writing model definition") - } - case downloader.LooksLikeURL(url): - log.Debug().Msgf("[startup] resolved model to download: %s", url) - - // md5 of model name - md5Name := utils.MD5(url) - - // check if file exists - if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) { - modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml" - err := downloader.DownloadFile(url, modelDefinitionFilePath, "", func(fileName, current, total string, percent float64) { - utils.DisplayDownloadFunction(fileName, current, total, percent) - }) - if err != nil { - log.Error().Err(err).Str("url", url).Str("filepath", modelDefinitionFilePath).Msg("error downloading model") - } - } - default: - if _, err := os.Stat(url); err == nil { - log.Debug().Msgf("[startup] resolved local model: %s", url) - // copy to modelPath - md5Name := utils.MD5(url) - - modelYAML, err := os.ReadFile(url) - if err != nil { - log.Error().Err(err).Str("filepath", url).Msg("error reading model definition") - continue - } - - modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml" - if err := os.WriteFile(modelDefinitionFilePath, modelYAML, os.ModePerm); err != nil { - log.Error().Err(err).Str("filepath", modelDefinitionFilePath).Msg("error loading model: %s") - } - } else { - log.Warn().Msgf("[startup] failed resolving model '%s'", url) - } - } - } -} - -func prepareModel(modelPath string, req gallery.GalleryModel, downloadStatus func(string, string, string, float64)) error { - - config, err := gallery.GetGalleryConfigFromURL(req.URL) - if err != nil { - return err - } - - config.Files = append(config.Files, req.AdditionalFiles...) - - return gallery.InstallModel(modelPath, req.Name, &config, req.Overrides, downloadStatus) + return processRequests(modelPath, s, cl, galleries, requests) } diff --git a/core/services/list_models.go b/core/services/list_models.go deleted file mode 100644 index a21e6faf..00000000 --- a/core/services/list_models.go +++ /dev/null @@ -1,72 +0,0 @@ -package services - -import ( - "regexp" - - "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/schema" - "github.com/go-skynet/LocalAI/pkg/model" -) - -type ListModelsService struct { - bcl *config.BackendConfigLoader - ml *model.ModelLoader - appConfig *config.ApplicationConfig -} - -func NewListModelsService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *ListModelsService { - return &ListModelsService{ - bcl: bcl, - ml: ml, - appConfig: appConfig, - } -} - -func (lms *ListModelsService) ListModels(filter string, excludeConfigured bool) ([]schema.OpenAIModel, error) { - - models, err := lms.ml.ListModels() - if err != nil { - return nil, err - } - - var mm map[string]interface{} = map[string]interface{}{} - - dataModels := []schema.OpenAIModel{} - - var filterFn func(name string) bool - - // If filter is not specified, do not filter the list by model name - if filter == "" { - filterFn = func(_ string) bool { return true } - } else { - // If filter _IS_ specified, we compile it to a regex which is used to create the filterFn - rxp, err := regexp.Compile(filter) - if err != nil { - return nil, err - } - filterFn = func(name string) bool { - return rxp.MatchString(name) - } - } - - // Start with the known configurations - for _, c := range lms.bcl.GetAllBackendConfigs() { - if excludeConfigured { - mm[c.Model] = nil - } - - if filterFn(c.Name) { - dataModels = append(dataModels, schema.OpenAIModel{ID: c.Name, Object: "model"}) - } - } - - // Then iterate through the loose files: - for _, m := range models { - // And only adds them if they shouldn't be skipped. - if _, exists := mm[m]; !exists && filterFn(m) { - dataModels = append(dataModels, schema.OpenAIModel{ID: m, Object: "model"}) - } - } - - return dataModels, nil -} diff --git a/core/services/openai.go b/core/services/openai.go deleted file mode 100644 index 7a2679ad..00000000 --- a/core/services/openai.go +++ /dev/null @@ -1,808 +0,0 @@ -package services - -import ( - "encoding/json" - "errors" - "fmt" - "strings" - "sync" - "time" - - "github.com/go-skynet/LocalAI/core/backend" - "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/schema" - "github.com/go-skynet/LocalAI/pkg/concurrency" - "github.com/go-skynet/LocalAI/pkg/grammar" - "github.com/go-skynet/LocalAI/pkg/model" - "github.com/go-skynet/LocalAI/pkg/utils" - "github.com/google/uuid" - "github.com/imdario/mergo" - "github.com/rs/zerolog/log" -) - -type endpointGenerationConfigurationFn func(bc *config.BackendConfig, request *schema.OpenAIRequest) endpointConfiguration - -type endpointConfiguration struct { - SchemaObject string - TemplatePath string - TemplateData model.PromptTemplateData - ResultMappingFn func(resp *backend.LLMResponse, index int) schema.Choice - CompletionMappingFn func(resp concurrency.ErrorOr[*backend.LLMResponse]) concurrency.ErrorOr[*schema.OpenAIResponse] - TokenMappingFn func(resp concurrency.ErrorOr[*backend.LLMResponse]) concurrency.ErrorOr[*schema.OpenAIResponse] -} - -// TODO: This is used for completion and edit. I am pretty sure I forgot parts, but fix it later. -func simpleMapper(resp concurrency.ErrorOr[*backend.LLMResponse]) concurrency.ErrorOr[*schema.OpenAIResponse] { - if resp.Error != nil || resp.Value == nil { - return concurrency.ErrorOr[*schema.OpenAIResponse]{Error: resp.Error} - } - return concurrency.ErrorOr[*schema.OpenAIResponse]{ - Value: &schema.OpenAIResponse{ - Choices: []schema.Choice{ - { - Text: resp.Value.Response, - }, - }, - Usage: schema.OpenAIUsage{ - PromptTokens: resp.Value.Usage.Prompt, - CompletionTokens: resp.Value.Usage.Completion, - TotalTokens: resp.Value.Usage.Prompt + resp.Value.Usage.Completion, - }, - }, - } -} - -// TODO: Consider alternative names for this. -// The purpose of this struct is to hold a reference to the OpenAI request context information -// This keeps things simple within core/services/openai.go and allows consumers to "see" this information if they need it -type OpenAIRequestTraceID struct { - ID string - Created int -} - -// This type split out from core/backend/llm.go - I'm still not _totally_ sure about this, but it seems to make sense to keep the generic LLM code from the OpenAI specific higher level functionality -type OpenAIService struct { - bcl *config.BackendConfigLoader - ml *model.ModelLoader - appConfig *config.ApplicationConfig - llmbs *backend.LLMBackendService -} - -func NewOpenAIService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig, llmbs *backend.LLMBackendService) *OpenAIService { - return &OpenAIService{ - bcl: bcl, - ml: ml, - appConfig: appConfig, - llmbs: llmbs, - } -} - -// Keeping in place as a reminder to POTENTIALLY ADD MORE VALIDATION HERE??? -func (oais *OpenAIService) getConfig(request *schema.OpenAIRequest) (*config.BackendConfig, *schema.OpenAIRequest, error) { - return oais.bcl.LoadBackendConfigForModelAndOpenAIRequest(request.Model, request, oais.appConfig) -} - -// TODO: It would be a lot less messy to make a return struct that had references to each of these channels -// INTENTIONALLY not doing that quite yet - I believe we need to let the references to unused channels die for the GC to automatically collect -- can we manually free()? -// finalResultsChannel is the primary async return path: one result for the entire request. -// promptResultsChannels is DUBIOUS. It's expected to be raw fan-out used within the function itself, but I am exposing for testing? One bundle of LLMResponseBundle per PromptString? Gets all N completions for a single prompt. -// completionsChannel is a channel that emits one *LLMResponse per generated completion, be that different prompts or N. Seems the most useful other than "entire request" Request is available to attempt tracing??? -// tokensChannel is a channel that emits one *LLMResponse per generated token. Let's see what happens! -func (oais *OpenAIService) Completion(request *schema.OpenAIRequest, notifyOnPromptResult bool, notifyOnToken bool) ( - traceID *OpenAIRequestTraceID, finalResultChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], promptResultsChannels []<-chan concurrency.ErrorOr[*backend.LLMResponseBundle], - completionsChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], tokenChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], err error) { - - return oais.GenerateTextFromRequest(request, func(bc *config.BackendConfig, request *schema.OpenAIRequest) endpointConfiguration { - return endpointConfiguration{ - SchemaObject: "text_completion", - TemplatePath: bc.TemplateConfig.Completion, - TemplateData: model.PromptTemplateData{ - SystemPrompt: bc.SystemPrompt, - }, - ResultMappingFn: func(resp *backend.LLMResponse, promptIndex int) schema.Choice { - return schema.Choice{ - Index: promptIndex, - FinishReason: "stop", - Text: resp.Response, - } - }, - CompletionMappingFn: simpleMapper, - TokenMappingFn: simpleMapper, - } - }, notifyOnPromptResult, notifyOnToken, nil) -} - -func (oais *OpenAIService) Edit(request *schema.OpenAIRequest, notifyOnPromptResult bool, notifyOnToken bool) ( - traceID *OpenAIRequestTraceID, finalResultChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], promptResultsChannels []<-chan concurrency.ErrorOr[*backend.LLMResponseBundle], - completionsChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], tokenChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], err error) { - - return oais.GenerateTextFromRequest(request, func(bc *config.BackendConfig, request *schema.OpenAIRequest) endpointConfiguration { - - return endpointConfiguration{ - SchemaObject: "edit", - TemplatePath: bc.TemplateConfig.Edit, - TemplateData: model.PromptTemplateData{ - SystemPrompt: bc.SystemPrompt, - Instruction: request.Instruction, - }, - ResultMappingFn: func(resp *backend.LLMResponse, promptIndex int) schema.Choice { - return schema.Choice{ - Index: promptIndex, - FinishReason: "stop", - Text: resp.Response, - } - }, - CompletionMappingFn: simpleMapper, - TokenMappingFn: simpleMapper, - } - }, notifyOnPromptResult, notifyOnToken, nil) -} - -func (oais *OpenAIService) Chat(request *schema.OpenAIRequest, notifyOnPromptResult bool, notifyOnToken bool) ( - traceID *OpenAIRequestTraceID, finalResultChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], - completionsChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], tokenChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], err error) { - - return oais.GenerateFromMultipleMessagesChatRequest(request, notifyOnPromptResult, notifyOnToken, nil) -} - -func (oais *OpenAIService) GenerateTextFromRequest(request *schema.OpenAIRequest, endpointConfigFn endpointGenerationConfigurationFn, notifyOnPromptResult bool, notifyOnToken bool, initialTraceID *OpenAIRequestTraceID) ( - traceID *OpenAIRequestTraceID, finalResultChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], promptResultsChannels []<-chan concurrency.ErrorOr[*backend.LLMResponseBundle], - completionsChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], tokenChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], err error) { - - if initialTraceID == nil { - traceID = &OpenAIRequestTraceID{ - ID: uuid.New().String(), - Created: int(time.Now().Unix()), - } - } else { - traceID = initialTraceID - } - - bc, request, err := oais.getConfig(request) - if err != nil { - log.Error().Err(err).Msgf("[oais::GenerateTextFromRequest] error getting configuration") - return - } - - if request.ResponseFormat.Type == "json_object" { - request.Grammar = grammar.JSONBNF - } - - bc.Grammar = request.Grammar - - if request.Stream && len(bc.PromptStrings) > 1 { - log.Warn().Msg("potentially cannot handle more than 1 `PromptStrings` when Streaming?") - } - - rawFinalResultChannel := make(chan concurrency.ErrorOr[*schema.OpenAIResponse]) - finalResultChannel = rawFinalResultChannel - promptResultsChannels = []<-chan concurrency.ErrorOr[*backend.LLMResponseBundle]{} - var rawCompletionsChannel chan concurrency.ErrorOr[*schema.OpenAIResponse] - var rawTokenChannel chan concurrency.ErrorOr[*schema.OpenAIResponse] - if notifyOnPromptResult { - rawCompletionsChannel = make(chan concurrency.ErrorOr[*schema.OpenAIResponse]) - } - if notifyOnToken { - rawTokenChannel = make(chan concurrency.ErrorOr[*schema.OpenAIResponse]) - } - - promptResultsChannelLock := sync.Mutex{} - - endpointConfig := endpointConfigFn(bc, request) - - if len(endpointConfig.TemplatePath) == 0 { - // A model can have a "file.bin.tmpl" file associated with a prompt template prefix - if oais.ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", bc.Model)) { - endpointConfig.TemplatePath = bc.Model - } else { - log.Warn().Msgf("failed to find any template for %+v", request) - } - } - - setupWG := sync.WaitGroup{} - var prompts []string - if lPS := len(bc.PromptStrings); lPS > 0 { - setupWG.Add(lPS) - prompts = bc.PromptStrings - } else { - setupWG.Add(len(bc.InputStrings)) - prompts = bc.InputStrings - } - - var setupError error = nil - - for pI, p := range prompts { - - go func(promptIndex int, prompt string) { - if endpointConfig.TemplatePath != "" { - promptTemplateData := model.PromptTemplateData{ - Input: prompt, - } - err := mergo.Merge(promptTemplateData, endpointConfig.TemplateData, mergo.WithOverride) - if err == nil { - templatedInput, err := oais.ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, endpointConfig.TemplatePath, promptTemplateData) - if err == nil { - prompt = templatedInput - log.Debug().Msgf("Template found, input modified to: %s", prompt) - } - } - } - - log.Debug().Msgf("[OAIS GenerateTextFromRequest] Prompt: %q", prompt) - promptResultsChannel, completionChannels, tokenChannels, err := oais.llmbs.GenerateText(prompt, request, bc, - func(r *backend.LLMResponse) schema.Choice { - return endpointConfig.ResultMappingFn(r, promptIndex) - }, notifyOnPromptResult, notifyOnToken) - if err != nil { - log.Error().Msgf("Unable to generate text prompt: %q\nerr: %q", prompt, err) - promptResultsChannelLock.Lock() - setupError = errors.Join(setupError, err) - promptResultsChannelLock.Unlock() - setupWG.Done() - return - } - if notifyOnPromptResult { - concurrency.SliceOfChannelsRawMergerWithoutMapping(concurrency.SliceOfChannelsTransformer(completionChannels, endpointConfig.CompletionMappingFn), rawCompletionsChannel, true) - } - if notifyOnToken { - concurrency.SliceOfChannelsRawMergerWithoutMapping(concurrency.SliceOfChannelsTransformer(tokenChannels, endpointConfig.TokenMappingFn), rawTokenChannel, true) - } - promptResultsChannelLock.Lock() - promptResultsChannels = append(promptResultsChannels, promptResultsChannel) - promptResultsChannelLock.Unlock() - setupWG.Done() - }(pI, p) - - } - setupWG.Wait() - - // If any of the setup goroutines experienced an error, quit early here. - if setupError != nil { - go func() { - log.Error().Err(setupError).Msgf("[OAIS GenerateTextFromRequest] caught an error during setup") - rawFinalResultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: setupError} - close(rawFinalResultChannel) - }() - return - } - - initialResponse := &schema.OpenAIResponse{ - ID: traceID.ID, - Created: traceID.Created, - Model: request.Model, - Object: endpointConfig.SchemaObject, - Usage: schema.OpenAIUsage{}, - } - - // utils.SliceOfChannelsRawMerger[[]schema.Choice](promptResultsChannels, rawFinalResultChannel, func(results []schema.Choice) (*schema.OpenAIResponse, error) { - concurrency.SliceOfChannelsReducer( - promptResultsChannels, rawFinalResultChannel, - func(iv concurrency.ErrorOr[*backend.LLMResponseBundle], result concurrency.ErrorOr[*schema.OpenAIResponse]) concurrency.ErrorOr[*schema.OpenAIResponse] { - - if iv.Error != nil { - result.Error = iv.Error - return result - } - result.Value.Usage.PromptTokens += iv.Value.Usage.Prompt - result.Value.Usage.CompletionTokens += iv.Value.Usage.Completion - result.Value.Usage.TotalTokens = result.Value.Usage.PromptTokens + result.Value.Usage.CompletionTokens - - result.Value.Choices = append(result.Value.Choices, iv.Value.Response...) - - return result - }, concurrency.ErrorOr[*schema.OpenAIResponse]{Value: initialResponse}, true) - - completionsChannel = rawCompletionsChannel - tokenChannel = rawTokenChannel - - return -} - -// TODO: For porting sanity, this is distinct from GenerateTextFromRequest and is _currently_ specific to Chat purposes -// this is not a final decision -- just a reality of moving a lot of parts at once -// / This has _become_ Chat which wasn't the goal... More cleanup in the future once it's stable? -func (oais *OpenAIService) GenerateFromMultipleMessagesChatRequest(request *schema.OpenAIRequest, notifyOnPromptResult bool, notifyOnToken bool, initialTraceID *OpenAIRequestTraceID) ( - traceID *OpenAIRequestTraceID, finalResultChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], - completionsChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], tokenChannel <-chan concurrency.ErrorOr[*schema.OpenAIResponse], err error) { - - if initialTraceID == nil { - traceID = &OpenAIRequestTraceID{ - ID: uuid.New().String(), - Created: int(time.Now().Unix()), - } - } else { - traceID = initialTraceID - } - - bc, request, err := oais.getConfig(request) - if err != nil { - return - } - - // Allow the user to set custom actions via config file - // to be "embedded" in each model - noActionName := "answer" - noActionDescription := "use this action to answer without performing any action" - - if bc.FunctionsConfig.NoActionFunctionName != "" { - noActionName = bc.FunctionsConfig.NoActionFunctionName - } - if bc.FunctionsConfig.NoActionDescriptionName != "" { - noActionDescription = bc.FunctionsConfig.NoActionDescriptionName - } - - if request.ResponseFormat.Type == "json_object" { - request.Grammar = grammar.JSONBNF - } - - bc.Grammar = request.Grammar - - processFunctions := false - funcs := grammar.Functions{} - // process functions if we have any defined or if we have a function call string - if len(request.Functions) > 0 && bc.ShouldUseFunctions() { - log.Debug().Msgf("Response needs to process functions") - - processFunctions = true - - noActionGrammar := grammar.Function{ - Name: noActionName, - Description: noActionDescription, - Parameters: map[string]interface{}{ - "properties": map[string]interface{}{ - "message": map[string]interface{}{ - "type": "string", - "description": "The message to reply the user with", - }}, - }, - } - - // Append the no action function - funcs = append(funcs, request.Functions...) - if !bc.FunctionsConfig.DisableNoAction { - funcs = append(funcs, noActionGrammar) - } - - // Force picking one of the functions by the request - if bc.FunctionToCall() != "" { - funcs = funcs.Select(bc.FunctionToCall()) - } - - // Update input grammar - jsStruct := funcs.ToJSONStructure() - bc.Grammar = jsStruct.Grammar("", bc.FunctionsConfig.ParallelCalls) - } else if request.JSONFunctionGrammarObject != nil { - bc.Grammar = request.JSONFunctionGrammarObject.Grammar("", bc.FunctionsConfig.ParallelCalls) - } - - if request.Stream && processFunctions { - log.Warn().Msg("Streaming + Functions is highly experimental in this version") - } - - var predInput string - - if !bc.TemplateConfig.UseTokenizerTemplate || processFunctions { - - suppressConfigSystemPrompt := false - mess := []string{} - for messageIndex, i := range request.Messages { - var content string - role := i.Role - - // if function call, we might want to customize the role so we can display better that the "assistant called a json action" - // if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request - if (i.FunctionCall != nil || i.ToolCalls != nil) && i.Role == "assistant" { - roleFn := "assistant_function_call" - r := bc.Roles[roleFn] - if r != "" { - role = roleFn - } - } - r := bc.Roles[role] - contentExists := i.Content != nil && i.StringContent != "" - - fcall := i.FunctionCall - if len(i.ToolCalls) > 0 { - fcall = i.ToolCalls - } - - // First attempt to populate content via a chat message specific template - if bc.TemplateConfig.ChatMessage != "" { - chatMessageData := model.ChatMessageTemplateData{ - SystemPrompt: bc.SystemPrompt, - Role: r, - RoleName: role, - Content: i.StringContent, - FunctionCall: fcall, - FunctionName: i.Name, - LastMessage: messageIndex == (len(request.Messages) - 1), - Function: bc.Grammar != "" && (messageIndex == (len(request.Messages) - 1)), - MessageIndex: messageIndex, - } - templatedChatMessage, err := oais.ml.EvaluateTemplateForChatMessage(bc.TemplateConfig.ChatMessage, chatMessageData) - if err != nil { - log.Error().Msgf("error processing message %+v using template \"%s\": %v. Skipping!", chatMessageData, bc.TemplateConfig.ChatMessage, err) - } else { - if templatedChatMessage == "" { - log.Warn().Msgf("template \"%s\" produced blank output for %+v. Skipping!", bc.TemplateConfig.ChatMessage, chatMessageData) - continue // TODO: This continue is here intentionally to skip over the line `mess = append(mess, content)` below, and to prevent the sprintf - } - log.Debug().Msgf("templated message for chat: %s", templatedChatMessage) - content = templatedChatMessage - } - } - marshalAnyRole := func(f any) { - j, err := json.Marshal(f) - if err == nil { - if contentExists { - content += "\n" + fmt.Sprint(r, " ", string(j)) - } else { - content = fmt.Sprint(r, " ", string(j)) - } - } - } - marshalAny := func(f any) { - j, err := json.Marshal(f) - if err == nil { - if contentExists { - content += "\n" + string(j) - } else { - content = string(j) - } - } - } - // If this model doesn't have such a template, or if that template fails to return a value, template at the message level. - if content == "" { - if r != "" { - if contentExists { - content = fmt.Sprint(r, i.StringContent) - } - - if i.FunctionCall != nil { - marshalAnyRole(i.FunctionCall) - } - } else { - if contentExists { - content = fmt.Sprint(i.StringContent) - } - - if i.FunctionCall != nil { - marshalAny(i.FunctionCall) - } - - if i.ToolCalls != nil { - marshalAny(i.ToolCalls) - } - } - // Special Handling: System. We care if it was printed at all, not the r branch, so check seperately - if contentExists && role == "system" { - suppressConfigSystemPrompt = true - } - } - - mess = append(mess, content) - } - - predInput = strings.Join(mess, "\n") - - log.Debug().Msgf("Prompt (before templating): %s", predInput) - - templateFile := "" - // A model can have a "file.bin.tmpl" file associated with a prompt template prefix - if oais.ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", bc.Model)) { - templateFile = bc.Model - } - - if bc.TemplateConfig.Chat != "" && !processFunctions { - templateFile = bc.TemplateConfig.Chat - } - - if bc.TemplateConfig.Functions != "" && processFunctions { - templateFile = bc.TemplateConfig.Functions - } - - if templateFile != "" { - templatedInput, err := oais.ml.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{ - SystemPrompt: bc.SystemPrompt, - SuppressSystemPrompt: suppressConfigSystemPrompt, - Input: predInput, - Functions: funcs, - }) - if err == nil { - predInput = templatedInput - log.Debug().Msgf("Template found, input modified to: %s", predInput) - } else { - log.Debug().Msgf("Template failed loading: %s", err.Error()) - } - } - } - log.Debug().Msgf("Prompt (after templating): %s", predInput) - if processFunctions { - log.Debug().Msgf("Grammar: %+v", bc.Grammar) - } - - rawFinalResultChannel := make(chan concurrency.ErrorOr[*schema.OpenAIResponse]) - var rawCompletionsChannel chan concurrency.ErrorOr[*schema.OpenAIResponse] - var rawTokenChannel chan concurrency.ErrorOr[*schema.OpenAIResponse] - if notifyOnPromptResult { - rawCompletionsChannel = make(chan concurrency.ErrorOr[*schema.OpenAIResponse]) - } - if notifyOnToken { - rawTokenChannel = make(chan concurrency.ErrorOr[*schema.OpenAIResponse]) - } - - rawResultChannel, individualCompletionChannels, tokenChannels, err := oais.llmbs.GenerateText(predInput, request, bc, func(resp *backend.LLMResponse) schema.Choice { - return schema.Choice{ - Index: 0, // ??? - FinishReason: "stop", - Message: &schema.Message{ - Role: "assistant", - Content: resp.Response, - }, - } - }, notifyOnPromptResult, notifyOnToken) - - chatSimpleMappingFn := func(resp concurrency.ErrorOr[*backend.LLMResponse]) concurrency.ErrorOr[*schema.OpenAIResponse] { - if resp.Error != nil || resp.Value == nil { - return concurrency.ErrorOr[*schema.OpenAIResponse]{Error: resp.Error} - } - return concurrency.ErrorOr[*schema.OpenAIResponse]{ - Value: &schema.OpenAIResponse{ - ID: traceID.ID, - Created: traceID.Created, - Model: request.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: []schema.Choice{ - { - Delta: &schema.Message{ - Role: "assistant", - Content: resp.Value.Response, - }, - Index: 0, - }, - }, - Object: "chat.completion.chunk", - Usage: schema.OpenAIUsage{ - PromptTokens: resp.Value.Usage.Prompt, - CompletionTokens: resp.Value.Usage.Completion, - TotalTokens: resp.Value.Usage.Prompt + resp.Value.Usage.Completion, - }, - }, - } - } - - if notifyOnPromptResult { - concurrency.SliceOfChannelsRawMergerWithoutMapping(concurrency.SliceOfChannelsTransformer(individualCompletionChannels, chatSimpleMappingFn), rawCompletionsChannel, true) - } - if notifyOnToken { - concurrency.SliceOfChannelsRawMergerWithoutMapping(concurrency.SliceOfChannelsTransformer(tokenChannels, chatSimpleMappingFn), rawTokenChannel, true) - } - - go func() { - rawResult := <-rawResultChannel - if rawResult.Error != nil { - log.Warn().Msgf("OpenAIService::processTools GenerateText error [DEBUG THIS?] %q", rawResult.Error) - return - } - llmResponseChoices := rawResult.Value.Response - - if processFunctions && len(llmResponseChoices) > 1 { - log.Warn().Msgf("chat functions response with %d choices in response, debug this?", len(llmResponseChoices)) - log.Debug().Msgf("%+v", llmResponseChoices) - } - - for _, result := range rawResult.Value.Response { - // If no functions, just return the raw result. - if !processFunctions { - - resp := schema.OpenAIResponse{ - ID: traceID.ID, - Created: traceID.Created, - Model: request.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: []schema.Choice{result}, - Object: "chat.completion.chunk", - Usage: schema.OpenAIUsage{ - PromptTokens: rawResult.Value.Usage.Prompt, - CompletionTokens: rawResult.Value.Usage.Completion, - TotalTokens: rawResult.Value.Usage.Prompt + rawResult.Value.Usage.Completion, - }, - } - - rawFinalResultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: &resp} - - continue - } - // At this point, things are function specific! - - // Oh no this can't be the right way to do this... but it works. Save us, mudler! - fString := fmt.Sprintf("%s", result.Message.Content) - results := parseFunctionCall(fString, bc.FunctionsConfig.ParallelCalls) - noActionToRun := (len(results) > 0 && results[0].name == noActionName) - - if noActionToRun { - log.Debug().Msg("-- noActionToRun branch --") - initialMessage := schema.OpenAIResponse{ - ID: traceID.ID, - Created: traceID.Created, - Model: request.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: ""}}}, - Object: "stop", - } - rawFinalResultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: &initialMessage} - - result, err := oais.handleQuestion(bc, request, results[0].arguments, predInput) - if err != nil { - log.Error().Msgf("error handling question: %s", err.Error()) - return - } - - resp := schema.OpenAIResponse{ - ID: traceID.ID, - Created: traceID.Created, - Model: request.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: []schema.Choice{{Delta: &schema.Message{Content: &result}, Index: 0}}, - Object: "chat.completion.chunk", - Usage: schema.OpenAIUsage{ - PromptTokens: rawResult.Value.Usage.Prompt, - CompletionTokens: rawResult.Value.Usage.Completion, - TotalTokens: rawResult.Value.Usage.Prompt + rawResult.Value.Usage.Completion, - }, - } - - rawFinalResultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: &resp} - - } else { - log.Debug().Msgf("[GenerateFromMultipleMessagesChatRequest] fnResultsBranch: %+v", results) - for i, ss := range results { - name, args := ss.name, ss.arguments - - initialMessage := schema.OpenAIResponse{ - ID: traceID.ID, - Created: traceID.Created, - Model: request.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: []schema.Choice{{ - FinishReason: "function_call", - Message: &schema.Message{ - Role: "assistant", - ToolCalls: []schema.ToolCall{ - { - Index: i, - ID: traceID.ID, - Type: "function", - FunctionCall: schema.FunctionCall{ - Name: name, - Arguments: args, - }, - }, - }, - }}}, - Object: "chat.completion.chunk", - } - rawFinalResultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: &initialMessage} - } - } - } - - close(rawFinalResultChannel) - }() - - finalResultChannel = rawFinalResultChannel - completionsChannel = rawCompletionsChannel - tokenChannel = rawTokenChannel - return -} - -func (oais *OpenAIService) handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, args, prompt string) (string, error) { - log.Debug().Msgf("[handleQuestion called] nothing to do, computing a reply") - - // If there is a message that the LLM already sends as part of the JSON reply, use it - arguments := map[string]interface{}{} - json.Unmarshal([]byte(args), &arguments) - m, exists := arguments["message"] - if exists { - switch message := m.(type) { - case string: - if message != "" { - log.Debug().Msgf("Reply received from LLM: %s", message) - message = oais.llmbs.Finetune(*config, prompt, message) - log.Debug().Msgf("Reply received from LLM(finetuned): %s", message) - - return message, nil - } - } - } - - log.Debug().Msgf("No action received from LLM, without a message, computing a reply") - // Otherwise ask the LLM to understand the JSON output and the context, and return a message - // Note: This costs (in term of CPU/GPU) another computation - config.Grammar = "" - images := []string{} - for _, m := range input.Messages { - images = append(images, m.StringImages...) - } - - resultChannel, _, err := oais.llmbs.Inference(input.Context, &backend.LLMRequest{ - Text: prompt, - Images: images, - RawMessages: input.Messages, // Experimental - }, config, false) - - if err != nil { - log.Error().Msgf("inference setup error: %s", err.Error()) - return "", err - } - - raw := <-resultChannel - if raw.Error != nil { - log.Error().Msgf("inference error: %q", raw.Error.Error()) - return "", err - } - if raw.Value == nil { - log.Warn().Msgf("nil inference response") - return "", nil - } - return oais.llmbs.Finetune(*config, prompt, raw.Value.Response), nil -} - -type funcCallResults struct { - name string - arguments string -} - -func parseFunctionCall(llmresult string, multipleResults bool) []funcCallResults { - - results := []funcCallResults{} - - // TODO: use generics to avoid this code duplication - if multipleResults { - ss := []map[string]interface{}{} - s := utils.EscapeNewLines(llmresult) - json.Unmarshal([]byte(s), &ss) - - for _, s := range ss { - func_name, ok := s["function"] - if !ok { - continue - } - args, ok := s["arguments"] - if !ok { - continue - } - d, _ := json.Marshal(args) - funcName, ok := func_name.(string) - if !ok { - continue - } - results = append(results, funcCallResults{name: funcName, arguments: string(d)}) - } - } else { - // As we have to change the result before processing, we can't stream the answer token-by-token (yet?) - ss := map[string]interface{}{} - // This prevent newlines to break JSON parsing for clients - s := utils.EscapeNewLines(llmresult) - if err := json.Unmarshal([]byte(s), &ss); err != nil { - log.Error().Msgf("error unmarshalling JSON: %s", err.Error()) - return results - } - - // The grammar defines the function name as "function", while OpenAI returns "name" - func_name, ok := ss["function"] - if !ok { - log.Debug().Msgf("ss[function] is not OK!, llm result: %q", llmresult) - return results - } - // Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object - args, ok := ss["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix) - if !ok { - log.Debug().Msg("ss[arguments] is not OK!") - return results - } - d, _ := json.Marshal(args) - funcName, ok := func_name.(string) - if !ok { - log.Debug().Msgf("unexpected func_name: %+v", func_name) - return results - } - results = append(results, funcCallResults{name: funcName, arguments: string(d)}) - } - return results -} diff --git a/core/startup/startup.go b/core/startup/startup.go index 92ccaa9d..6298f034 100644 --- a/core/startup/startup.go +++ b/core/startup/startup.go @@ -4,21 +4,17 @@ import ( "fmt" "os" - "github.com/go-skynet/LocalAI/core" - "github.com/go-skynet/LocalAI/core/backend" "github.com/go-skynet/LocalAI/core/config" - openaiendpoint "github.com/go-skynet/LocalAI/core/http/endpoints/openai" // TODO: This is dubious. Fix this when splitting assistant api up. "github.com/go-skynet/LocalAI/core/services" "github.com/go-skynet/LocalAI/internal" "github.com/go-skynet/LocalAI/pkg/assets" "github.com/go-skynet/LocalAI/pkg/model" - "github.com/go-skynet/LocalAI/pkg/utils" + pkgStartup "github.com/go-skynet/LocalAI/pkg/startup" "github.com/rs/zerolog" "github.com/rs/zerolog/log" ) -// (*config.BackendConfigLoader, *model.ModelLoader, *config.ApplicationConfig, error) { -func Startup(opts ...config.AppOption) (*core.Application, error) { +func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.ModelLoader, *config.ApplicationConfig, error) { options := config.NewApplicationConfig(opts...) zerolog.SetGlobalLevel(zerolog.InfoLevel) @@ -31,75 +27,68 @@ func Startup(opts ...config.AppOption) (*core.Application, error) { // Make sure directories exists if options.ModelPath == "" { - return nil, fmt.Errorf("options.ModelPath cannot be empty") + return nil, nil, nil, fmt.Errorf("options.ModelPath cannot be empty") } err := os.MkdirAll(options.ModelPath, 0755) if err != nil { - return nil, fmt.Errorf("unable to create ModelPath: %q", err) + return nil, nil, nil, fmt.Errorf("unable to create ModelPath: %q", err) } if options.ImageDir != "" { err := os.MkdirAll(options.ImageDir, 0755) if err != nil { - return nil, fmt.Errorf("unable to create ImageDir: %q", err) + return nil, nil, nil, fmt.Errorf("unable to create ImageDir: %q", err) } } if options.AudioDir != "" { err := os.MkdirAll(options.AudioDir, 0755) if err != nil { - return nil, fmt.Errorf("unable to create AudioDir: %q", err) + return nil, nil, nil, fmt.Errorf("unable to create AudioDir: %q", err) } } if options.UploadDir != "" { err := os.MkdirAll(options.UploadDir, 0755) if err != nil { - return nil, fmt.Errorf("unable to create UploadDir: %q", err) - } - } - if options.ConfigsDir != "" { - err := os.MkdirAll(options.ConfigsDir, 0755) - if err != nil { - return nil, fmt.Errorf("unable to create ConfigsDir: %q", err) + return nil, nil, nil, fmt.Errorf("unable to create UploadDir: %q", err) } } - // Load config jsons - utils.LoadConfig(options.UploadDir, openaiendpoint.UploadedFilesFile, &openaiendpoint.UploadedFiles) - utils.LoadConfig(options.ConfigsDir, openaiendpoint.AssistantsConfigFile, &openaiendpoint.Assistants) - utils.LoadConfig(options.ConfigsDir, openaiendpoint.AssistantsFileConfigFile, &openaiendpoint.AssistantFiles) + // + pkgStartup.PreloadModelsConfigurations(options.ModelLibraryURL, options.ModelPath, options.ModelsURL...) - app := createApplication(options) + cl := config.NewBackendConfigLoader() + ml := model.NewModelLoader(options.ModelPath) - services.PreloadModelsConfigurations(options.ModelLibraryURL, options.ModelPath, options.ModelsURL...) + configLoaderOpts := options.ToConfigLoaderOptions() - if err := app.BackendConfigLoader.LoadBackendConfigsFromPath(options.ModelPath, app.ApplicationConfig.ToConfigLoaderOptions()...); err != nil { + if err := cl.LoadBackendConfigsFromPath(options.ModelPath, configLoaderOpts...); err != nil { log.Error().Err(err).Msg("error loading config files") } if options.ConfigFile != "" { - if err := app.BackendConfigLoader.LoadBackendConfigFile(options.ConfigFile, app.ApplicationConfig.ToConfigLoaderOptions()...); err != nil { + if err := cl.LoadBackendConfigFile(options.ConfigFile, configLoaderOpts...); err != nil { log.Error().Err(err).Msg("error loading config file") } } - if err := app.BackendConfigLoader.Preload(options.ModelPath); err != nil { + if err := cl.Preload(options.ModelPath); err != nil { log.Error().Err(err).Msg("error downloading models") } if options.PreloadJSONModels != "" { - if err := services.ApplyGalleryFromString(options.ModelPath, options.PreloadJSONModels, app.BackendConfigLoader, options.Galleries); err != nil { - return nil, err + if err := services.ApplyGalleryFromString(options.ModelPath, options.PreloadJSONModels, cl, options.Galleries); err != nil { + return nil, nil, nil, err } } if options.PreloadModelsFromPath != "" { - if err := services.ApplyGalleryFromFile(options.ModelPath, options.PreloadModelsFromPath, app.BackendConfigLoader, options.Galleries); err != nil { - return nil, err + if err := services.ApplyGalleryFromFile(options.ModelPath, options.PreloadModelsFromPath, cl, options.Galleries); err != nil { + return nil, nil, nil, err } } if options.Debug { - for _, v := range app.BackendConfigLoader.ListBackendConfigs() { - cfg, _ := app.BackendConfigLoader.GetBackendConfig(v) + for _, v := range cl.ListBackendConfigs() { + cfg, _ := cl.GetBackendConfig(v) log.Debug().Msgf("Model: %s (config: %+v)", v, cfg) } } @@ -117,17 +106,17 @@ func Startup(opts ...config.AppOption) (*core.Application, error) { go func() { <-options.Context.Done() log.Debug().Msgf("Context canceled, shutting down") - app.ModelLoader.StopAllGRPC() + ml.StopAllGRPC() }() if options.WatchDog { wd := model.NewWatchDog( - app.ModelLoader, + ml, options.WatchDogBusyTimeout, options.WatchDogIdleTimeout, options.WatchDogBusy, options.WatchDogIdle) - app.ModelLoader.SetWatchDog(wd) + ml.SetWatchDog(wd) go wd.Run() go func() { <-options.Context.Done() @@ -137,35 +126,5 @@ func Startup(opts ...config.AppOption) (*core.Application, error) { } log.Info().Msg("core/startup process completed!") - return app, nil -} - -// In Lieu of a proper DI framework, this function wires up the Application manually. -// This is in core/startup rather than core/state.go to keep package references clean! -func createApplication(appConfig *config.ApplicationConfig) *core.Application { - app := &core.Application{ - ApplicationConfig: appConfig, - BackendConfigLoader: config.NewBackendConfigLoader(), - ModelLoader: model.NewModelLoader(appConfig.ModelPath), - } - - var err error - - app.EmbeddingsBackendService = backend.NewEmbeddingsBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) - app.ImageGenerationBackendService = backend.NewImageGenerationBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) - app.LLMBackendService = backend.NewLLMBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) - app.TranscriptionBackendService = backend.NewTranscriptionBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) - app.TextToSpeechBackendService = backend.NewTextToSpeechBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) - - app.BackendMonitorService = services.NewBackendMonitorService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) - app.GalleryService = services.NewGalleryService(app.ApplicationConfig.ModelPath) - app.ListModelsService = services.NewListModelsService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) - app.OpenAIService = services.NewOpenAIService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig, app.LLMBackendService) - - app.LocalAIMetricsService, err = services.NewLocalAIMetricsService() - if err != nil { - log.Warn().Msg("Unable to initialize LocalAIMetricsService - non-fatal, optional service") - } - - return app + return cl, ml, options, nil } diff --git a/core/state.go b/core/state.go deleted file mode 100644 index cf0d614b..00000000 --- a/core/state.go +++ /dev/null @@ -1,41 +0,0 @@ -package core - -import ( - "github.com/go-skynet/LocalAI/core/backend" - "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/services" - "github.com/go-skynet/LocalAI/pkg/model" -) - -// TODO: Can I come up with a better name or location for this? -// The purpose of this structure is to hold pointers to all initialized services, to make plumbing easy -// Perhaps a proper DI system is worth it in the future, but for now keep things simple. -type Application struct { - - // Application-Level Config - ApplicationConfig *config.ApplicationConfig - // ApplicationState *ApplicationState - - // Core Low-Level Services - BackendConfigLoader *config.BackendConfigLoader - ModelLoader *model.ModelLoader - - // Backend Services - EmbeddingsBackendService *backend.EmbeddingsBackendService - ImageGenerationBackendService *backend.ImageGenerationBackendService - LLMBackendService *backend.LLMBackendService - TranscriptionBackendService *backend.TranscriptionBackendService - TextToSpeechBackendService *backend.TextToSpeechBackendService - - // LocalAI System Services - BackendMonitorService *services.BackendMonitorService - GalleryService *services.GalleryService - ListModelsService *services.ListModelsService - LocalAIMetricsService *services.LocalAIMetricsService - OpenAIService *services.OpenAIService -} - -// TODO [NEXT PR?]: Break up ApplicationConfig. -// Migrate over stuff that is not set via config at all - especially runtime stuff -type ApplicationState struct { -} diff --git a/examples/bruno/LocalAI Test Requests/llm text/-completions Stream.bru b/examples/bruno/LocalAI Test Requests/llm text/-completions Stream.bru deleted file mode 100644 index c33bafe1..00000000 --- a/examples/bruno/LocalAI Test Requests/llm text/-completions Stream.bru +++ /dev/null @@ -1,25 +0,0 @@ -meta { - name: -completions Stream - type: http - seq: 4 -} - -post { - url: {{PROTOCOL}}{{HOST}}:{{PORT}}/completions - body: json - auth: none -} - -headers { - Content-Type: application/json -} - -body:json { - { - "model": "{{DEFAULT_MODEL}}", - "prompt": "function downloadFile(string url, string outputPath) {", - "max_tokens": 256, - "temperature": 0.5, - "stream": true - } -} diff --git a/pkg/concurrency/concurrency.go b/pkg/concurrency/concurrency.go deleted file mode 100644 index 324e8cc5..00000000 --- a/pkg/concurrency/concurrency.go +++ /dev/null @@ -1,135 +0,0 @@ -package concurrency - -import ( - "sync" -) - -// TODO: closeWhenDone bool parameter :: -// It currently is experimental, and therefore exists. -// Is there ever a situation to use false? - -// This function is used to merge the results of a slice of channels of a specific result type down to a single result channel of a second type. -// mappingFn allows the caller to convert from the input type to the output type -// if closeWhenDone is set to true, the output channel will be closed when all individual result channels of the slice have been closed - otherwise it will be left open for future use. -// The same WaitGroup used to trigger that optional closing is returned for any other synchronization purposes. -func SliceOfChannelsRawMerger[IndividualResultType any, OutputResultType any](individualResultChannels []<-chan IndividualResultType, outputChannel chan<- OutputResultType, mappingFn func(IndividualResultType) (OutputResultType, error), closeWhenDone bool) *sync.WaitGroup { - var wg sync.WaitGroup - wg.Add(len(individualResultChannels)) - mergingFn := func(c <-chan IndividualResultType) { - for r := range c { - mr, err := mappingFn(r) - if err == nil { - outputChannel <- mr - } - } - wg.Done() - } - for _, irc := range individualResultChannels { - go mergingFn(irc) - } - if closeWhenDone { - go func() { - wg.Wait() - close(outputChannel) - }() - } - - return &wg -} - -// This function is used to merge the results of a slice of channels of a specific result type down to a single result channel of THE SAME TYPE. -// if closeWhenDone is set to true, the output channel will be closed when all individual result channels of the slice have been closed - otherwise it will be left open for future use. -// The same WaitGroup used to trigger that optional closing is returned for any other synchronization purposes. -func SliceOfChannelsRawMergerWithoutMapping[ResultType any](individualResultsChannels []<-chan ResultType, outputChannel chan<- ResultType, closeWhenDone bool) *sync.WaitGroup { - return SliceOfChannelsRawMerger(individualResultsChannels, outputChannel, func(v ResultType) (ResultType, error) { return v, nil }, closeWhenDone) -} - -// This function is used to merge the results of a slice of channels of a specific result type down to a single succcess result channel of a second type, and an error channel -// mappingFn allows the caller to convert from the input type to the output type -// This variant is designed to be aware of concurrency.ErrorOr[T], splitting successes from failures. -// if closeWhenDone is set to true, the output channel will be closed when all individual result channels of the slice have been closed - otherwise it will be left open for future use. -// The same WaitGroup used to trigger that optional closing is returned for any other synchronization purposes. -func SliceOfChannelsMergerWithErrors[IndividualResultType any, OutputResultType any](individualResultChannels []<-chan ErrorOr[IndividualResultType], successChannel chan<- OutputResultType, errorChannel chan<- error, mappingFn func(IndividualResultType) (OutputResultType, error), closeWhenDone bool) *sync.WaitGroup { - var wg sync.WaitGroup - wg.Add(len(individualResultChannels)) - mergingFn := func(c <-chan ErrorOr[IndividualResultType]) { - for r := range c { - if r.Error != nil { - errorChannel <- r.Error - } else { - mv, err := mappingFn(r.Value) - if err != nil { - errorChannel <- err - } else { - successChannel <- mv - } - } - } - wg.Done() - } - for _, irc := range individualResultChannels { - go mergingFn(irc) - } - if closeWhenDone { - go func() { - wg.Wait() - close(successChannel) - close(errorChannel) - }() - } - return &wg -} - -// This function is used to reduce down the results of a slice of channels of a specific result type down to a single result value of a second type. -// reducerFn allows the caller to convert from the input type to the output type -// if closeWhenDone is set to true, the output channel will be closed when all individual result channels of the slice have been closed - otherwise it will be left open for future use. -// The same WaitGroup used to trigger that optional closing is returned for any other synchronization purposes. -func SliceOfChannelsReducer[InputResultType any, OutputResultType any](individualResultsChannels []<-chan InputResultType, outputChannel chan<- OutputResultType, - reducerFn func(iv InputResultType, ov OutputResultType) OutputResultType, initialValue OutputResultType, closeWhenDone bool) (wg *sync.WaitGroup) { - wg = &sync.WaitGroup{} - wg.Add(len(individualResultsChannels)) - reduceLock := sync.Mutex{} - reducingFn := func(c <-chan InputResultType) { - for iv := range c { - reduceLock.Lock() - initialValue = reducerFn(iv, initialValue) - reduceLock.Unlock() - } - wg.Done() - } - for _, irc := range individualResultsChannels { - go reducingFn(irc) - } - go func() { - wg.Wait() - outputChannel <- initialValue - if closeWhenDone { - close(outputChannel) - } - }() - return wg -} - -// This function is primarily designed to be used in combination with the above utility functions. -// A slice of input result channels of a specific type is provided, along with a function to map those values to another type -// A slice of output result channels is returned, where each value is mapped as it comes in. -// The order of the slice will be retained. -func SliceOfChannelsTransformer[InputResultType any, OutputResultType any](inputChanels []<-chan InputResultType, mappingFn func(v InputResultType) OutputResultType) (outputChannels []<-chan OutputResultType) { - rawOutputChannels := make([]<-chan OutputResultType, len(inputChanels)) - - transformingFn := func(ic <-chan InputResultType, oc chan OutputResultType) { - for iv := range ic { - oc <- mappingFn(iv) - } - close(oc) - } - - for ci, c := range inputChanels { - roc := make(chan OutputResultType) - go transformingFn(c, roc) - rawOutputChannels[ci] = roc - } - - outputChannels = rawOutputChannels - return -} diff --git a/pkg/concurrency/concurrency_test.go b/pkg/concurrency/concurrency_test.go deleted file mode 100644 index fedd74be..00000000 --- a/pkg/concurrency/concurrency_test.go +++ /dev/null @@ -1,101 +0,0 @@ -package concurrency_test - -// TODO: noramlly, these go in utils_tests, right? Why does this cause problems only in pkg/utils? - -import ( - "fmt" - "slices" - - . "github.com/go-skynet/LocalAI/pkg/concurrency" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" -) - -var _ = Describe("utils/concurrency tests", func() { - It("SliceOfChannelsReducer works", func() { - individualResultsChannels := []<-chan int{} - initialValue := 0 - for i := 0; i < 3; i++ { - c := make(chan int) - go func(i int, c chan int) { - for ii := 1; ii < 4; ii++ { - c <- (i * ii) - } - close(c) - }(i, c) - individualResultsChannels = append(individualResultsChannels, c) - } - Expect(len(individualResultsChannels)).To(Equal(3)) - finalResultChannel := make(chan int) - wg := SliceOfChannelsReducer[int, int](individualResultsChannels, finalResultChannel, func(input int, val int) int { - return val + input - }, initialValue, true) - - Expect(wg).ToNot(BeNil()) - - result := <-finalResultChannel - - Expect(result).ToNot(Equal(0)) - Expect(result).To(Equal(18)) - }) - - It("SliceOfChannelsRawMergerWithoutMapping works", func() { - individualResultsChannels := []<-chan int{} - for i := 0; i < 3; i++ { - c := make(chan int) - go func(i int, c chan int) { - for ii := 1; ii < 4; ii++ { - c <- (i * ii) - } - close(c) - }(i, c) - individualResultsChannels = append(individualResultsChannels, c) - } - Expect(len(individualResultsChannels)).To(Equal(3)) - outputChannel := make(chan int) - wg := SliceOfChannelsRawMergerWithoutMapping(individualResultsChannels, outputChannel, true) - Expect(wg).ToNot(BeNil()) - outputSlice := []int{} - for v := range outputChannel { - outputSlice = append(outputSlice, v) - } - Expect(len(outputSlice)).To(Equal(9)) - slices.Sort(outputSlice) - Expect(outputSlice[0]).To(BeZero()) - Expect(outputSlice[3]).To(Equal(1)) - Expect(outputSlice[8]).To(Equal(6)) - }) - - It("SliceOfChannelsTransformer works", func() { - individualResultsChannels := []<-chan int{} - for i := 0; i < 3; i++ { - c := make(chan int) - go func(i int, c chan int) { - for ii := 1; ii < 4; ii++ { - c <- (i * ii) - } - close(c) - }(i, c) - individualResultsChannels = append(individualResultsChannels, c) - } - Expect(len(individualResultsChannels)).To(Equal(3)) - mappingFn := func(i int) string { - return fmt.Sprintf("$%d", i) - } - - outputChannels := SliceOfChannelsTransformer(individualResultsChannels, mappingFn) - Expect(len(outputChannels)).To(Equal(3)) - rSlice := []string{} - for ii := 1; ii < 4; ii++ { - for i := 0; i < 3; i++ { - res := <-outputChannels[i] - rSlice = append(rSlice, res) - } - } - slices.Sort(rSlice) - Expect(rSlice[0]).To(Equal("$0")) - Expect(rSlice[3]).To(Equal("$1")) - Expect(rSlice[8]).To(Equal("$6")) - }) -}) diff --git a/pkg/concurrency/types.go b/pkg/concurrency/types.go deleted file mode 100644 index 76081ba3..00000000 --- a/pkg/concurrency/types.go +++ /dev/null @@ -1,6 +0,0 @@ -package concurrency - -type ErrorOr[T any] struct { - Value T - Error error -} diff --git a/pkg/grpc/backend.go b/pkg/grpc/backend.go index 49a6b1bd..8fb8c39d 100644 --- a/pkg/grpc/backend.go +++ b/pkg/grpc/backend.go @@ -41,7 +41,7 @@ type Backend interface { PredictStream(ctx context.Context, in *pb.PredictOptions, f func(s []byte), opts ...grpc.CallOption) error GenerateImage(ctx context.Context, in *pb.GenerateImageRequest, opts ...grpc.CallOption) (*pb.Result, error) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error) - AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.TranscriptionResult, error) + AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.Result, error) TokenizeString(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.TokenizationResponse, error) Status(ctx context.Context) (*pb.StatusResponse, error) diff --git a/pkg/grpc/base/base.go b/pkg/grpc/base/base.go index c0b4bc34..0af5d94f 100644 --- a/pkg/grpc/base/base.go +++ b/pkg/grpc/base/base.go @@ -53,8 +53,8 @@ func (llm *Base) GenerateImage(*pb.GenerateImageRequest) error { return fmt.Errorf("unimplemented") } -func (llm *Base) AudioTranscription(*pb.TranscriptRequest) (schema.TranscriptionResult, error) { - return schema.TranscriptionResult{}, fmt.Errorf("unimplemented") +func (llm *Base) AudioTranscription(*pb.TranscriptRequest) (schema.Result, error) { + return schema.Result{}, fmt.Errorf("unimplemented") } func (llm *Base) TTS(*pb.TTSRequest) error { diff --git a/pkg/grpc/client.go b/pkg/grpc/client.go index 0e0e56c7..882db12a 100644 --- a/pkg/grpc/client.go +++ b/pkg/grpc/client.go @@ -210,7 +210,7 @@ func (c *Client) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOp return client.TTS(ctx, in, opts...) } -func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.TranscriptionResult, error) { +func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.Result, error) { if !c.parallel { c.opMutex.Lock() defer c.opMutex.Unlock() @@ -231,7 +231,7 @@ func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptReques if err != nil { return nil, err } - tresult := &schema.TranscriptionResult{} + tresult := &schema.Result{} for _, s := range res.Segments { tks := []int{} for _, t := range s.Tokens { diff --git a/pkg/grpc/embed.go b/pkg/grpc/embed.go index b4ba4884..73b185a3 100644 --- a/pkg/grpc/embed.go +++ b/pkg/grpc/embed.go @@ -53,12 +53,12 @@ func (e *embedBackend) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc. return e.s.TTS(ctx, in) } -func (e *embedBackend) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.TranscriptionResult, error) { +func (e *embedBackend) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.Result, error) { r, err := e.s.AudioTranscription(ctx, in) if err != nil { return nil, err } - tr := &schema.TranscriptionResult{} + tr := &schema.Result{} for _, s := range r.Segments { var tks []int for _, t := range s.Tokens { diff --git a/pkg/grpc/interface.go b/pkg/grpc/interface.go index aa7a3fbc..4d06544d 100644 --- a/pkg/grpc/interface.go +++ b/pkg/grpc/interface.go @@ -15,7 +15,7 @@ type LLM interface { Load(*pb.ModelOptions) error Embeddings(*pb.PredictOptions) ([]float32, error) GenerateImage(*pb.GenerateImageRequest) error - AudioTranscription(*pb.TranscriptRequest) (schema.TranscriptionResult, error) + AudioTranscription(*pb.TranscriptRequest) (schema.Result, error) TTS(*pb.TTSRequest) error TokenizeString(*pb.PredictOptions) (pb.TokenizationResponse, error) Status() (pb.StatusResponse, error) diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index 617d8f62..5d9808a4 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -81,7 +81,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string if _, err := os.Stat(uri); err == nil { serverAddress, err := getFreeAddress() if err != nil { - return "", fmt.Errorf("%s failed allocating free ports: %s", backend, err.Error()) + return "", fmt.Errorf("failed allocating free ports: %s", err.Error()) } // Make sure the process is executable if err := ml.startProcess(uri, o.model, serverAddress); err != nil { @@ -134,7 +134,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string if !ready { log.Debug().Msgf("GRPC Service NOT ready") - return "", fmt.Errorf("%s grpc service not ready", backend) + return "", fmt.Errorf("grpc service not ready") } options := *o.gRPCOptions @@ -145,10 +145,10 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string res, err := client.GRPC(o.parallelRequests, ml.wd).LoadModel(o.context, &options) if err != nil { - return "", fmt.Errorf("\"%s\" could not load model: %w", backend, err) + return "", fmt.Errorf("could not load model: %w", err) } if !res.Success { - return "", fmt.Errorf("\"%s\" could not load model (no success): %s", backend, res.Message) + return "", fmt.Errorf("could not load model (no success): %s", res.Message) } return client, nil diff --git a/pkg/startup/model_preload.go b/pkg/startup/model_preload.go new file mode 100644 index 00000000..b09516a7 --- /dev/null +++ b/pkg/startup/model_preload.go @@ -0,0 +1,85 @@ +package startup + +import ( + "errors" + "os" + "path/filepath" + + "github.com/go-skynet/LocalAI/embedded" + "github.com/go-skynet/LocalAI/pkg/downloader" + "github.com/go-skynet/LocalAI/pkg/utils" + "github.com/rs/zerolog/log" +) + +// PreloadModelsConfigurations will preload models from the given list of URLs +// It will download the model if it is not already present in the model path +// It will also try to resolve if the model is an embedded model YAML configuration +func PreloadModelsConfigurations(modelLibraryURL string, modelPath string, models ...string) { + for _, url := range models { + + // As a best effort, try to resolve the model from the remote library + // if it's not resolved we try with the other method below + if modelLibraryURL != "" { + lib, err := embedded.GetRemoteLibraryShorteners(modelLibraryURL) + if err == nil { + if lib[url] != "" { + log.Debug().Msgf("[startup] model configuration is defined remotely: %s (%s)", url, lib[url]) + url = lib[url] + } + } + } + + url = embedded.ModelShortURL(url) + switch { + case embedded.ExistsInModelsLibrary(url): + modelYAML, err := embedded.ResolveContent(url) + // If we resolve something, just save it to disk and continue + if err != nil { + log.Error().Err(err).Msg("error resolving model content") + continue + } + + log.Debug().Msgf("[startup] resolved embedded model: %s", url) + md5Name := utils.MD5(url) + modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml" + if err := os.WriteFile(modelDefinitionFilePath, modelYAML, os.ModePerm); err != nil { + log.Error().Err(err).Str("filepath", modelDefinitionFilePath).Msg("error writing model definition") + } + case downloader.LooksLikeURL(url): + log.Debug().Msgf("[startup] resolved model to download: %s", url) + + // md5 of model name + md5Name := utils.MD5(url) + + // check if file exists + if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) { + modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml" + err := downloader.DownloadFile(url, modelDefinitionFilePath, "", func(fileName, current, total string, percent float64) { + utils.DisplayDownloadFunction(fileName, current, total, percent) + }) + if err != nil { + log.Error().Err(err).Str("url", url).Str("filepath", modelDefinitionFilePath).Msg("error downloading model") + } + } + default: + if _, err := os.Stat(url); err == nil { + log.Debug().Msgf("[startup] resolved local model: %s", url) + // copy to modelPath + md5Name := utils.MD5(url) + + modelYAML, err := os.ReadFile(url) + if err != nil { + log.Error().Err(err).Str("filepath", url).Msg("error reading model definition") + continue + } + + modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml" + if err := os.WriteFile(modelDefinitionFilePath, modelYAML, os.ModePerm); err != nil { + log.Error().Err(err).Str("filepath", modelDefinitionFilePath).Msg("error loading model: %s") + } + } else { + log.Warn().Msgf("[startup] failed resolving model '%s'", url) + } + } + } +} diff --git a/core/services/model_preload_test.go b/pkg/startup/model_preload_test.go similarity index 96% rename from core/services/model_preload_test.go rename to pkg/startup/model_preload_test.go index fc65d565..63a8f8b0 100644 --- a/core/services/model_preload_test.go +++ b/pkg/startup/model_preload_test.go @@ -1,14 +1,13 @@ -package services_test +package startup_test import ( "fmt" "os" "path/filepath" + . "github.com/go-skynet/LocalAI/pkg/startup" "github.com/go-skynet/LocalAI/pkg/utils" - . "github.com/go-skynet/LocalAI/core/services" - . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) diff --git a/pkg/utils/base64.go b/pkg/utils/base64.go deleted file mode 100644 index 769d8a88..00000000 --- a/pkg/utils/base64.go +++ /dev/null @@ -1,50 +0,0 @@ -package utils - -import ( - "encoding/base64" - "fmt" - "io" - "net/http" - "strings" - "time" -) - -var base64DownloadClient http.Client = http.Client{ - Timeout: 30 * time.Second, -} - -// this function check if the string is an URL, if it's an URL downloads the image in memory -// encodes it in base64 and returns the base64 string - -// This may look weird down in pkg/utils while it is currently only used in core/config -// -// but I believe it may be useful for MQTT as well in the near future, so I'm -// extracting it while I'm thinking of it. -func GetImageURLAsBase64(s string) (string, error) { - if strings.HasPrefix(s, "http") { - // download the image - resp, err := base64DownloadClient.Get(s) - if err != nil { - return "", err - } - defer resp.Body.Close() - - // read the image data into memory - data, err := io.ReadAll(resp.Body) - if err != nil { - return "", err - } - - // encode the image data in base64 - encoded := base64.StdEncoding.EncodeToString(data) - - // return the base64 string - return encoded, nil - } - - // if the string instead is prefixed with "data:image/jpeg;base64,", drop it - if strings.HasPrefix(s, "data:image/jpeg;base64,") { - return strings.ReplaceAll(s, "data:image/jpeg;base64,", ""), nil - } - return "", fmt.Errorf("not valid string") -} From e9f090257c57181ffd411052e6b818ff6f09550f Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Wed, 17 Apr 2024 20:59:05 -0500 Subject: [PATCH 0073/2648] fix: adjust some sources names to match the naming of their repositories (#2061) Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- Makefile | 60 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/Makefile b/Makefile index 6715e91e..d236f860 100644 --- a/Makefile +++ b/Makefile @@ -179,20 +179,20 @@ endif all: help ## BERT embeddings -sources/go-bert: - git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp sources/go-bert - cd sources/go-bert && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1 +sources/go-bert.cpp: + git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp sources/go-bert.cpp + cd sources/go-bert.cpp && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1 -sources/go-bert/libgobert.a: sources/go-bert - $(MAKE) -C sources/go-bert libgobert.a +sources/go-bert.cpp/libgobert.a: sources/go-bert.cpp + $(MAKE) -C sources/go-bert.cpp libgobert.a -## go-llama-ggml -sources/go-llama-ggml: - git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama-ggml - cd sources/go-llama-ggml && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1 +## go-llama.cpp +sources/go-llama.cpp: + git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama.cpp + cd sources/go-llama.cpp && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1 -sources/go-llama-ggml/libbinding.a: sources/go-llama-ggml - $(MAKE) -C sources/go-llama-ggml BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a +sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp + $(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a ## go-piper sources/go-piper: @@ -211,12 +211,12 @@ sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all $(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a ## RWKV -sources/go-rwkv: - git clone --recurse-submodules $(RWKV_REPO) sources/go-rwkv - cd sources/go-rwkv && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1 +sources/go-rwkv.cpp: + git clone --recurse-submodules $(RWKV_REPO) sources/go-rwkv.cpp + cd sources/go-rwkv.cpp && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1 -sources/go-rwkv/librwkv.a: sources/go-rwkv - cd sources/go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a .. +sources/go-rwkv.cpp/librwkv.a: sources/go-rwkv.cpp + cd sources/go-rwkv.cpp && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a .. ## stable diffusion sources/go-stable-diffusion: @@ -236,23 +236,24 @@ sources/go-tiny-dream/libtinydream.a: sources/go-tiny-dream ## whisper sources/whisper.cpp: - git clone https://github.com/ggerganov/whisper.cpp.git sources/whisper.cpp + git clone https://github.com/ggerganov/whisper.cpp sources/whisper.cpp cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1 sources/whisper.cpp/libwhisper.a: sources/whisper.cpp cd sources/whisper.cpp && make libwhisper.a -get-sources: sources/go-llama-ggml sources/gpt4all sources/go-piper sources/go-rwkv sources/whisper.cpp sources/go-bert sources/go-stable-diffusion sources/go-tiny-dream +get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream replace: - $(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv + $(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp $(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp $(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go - $(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert + $(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert.cpp $(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream $(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper $(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion $(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang + $(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp dropreplace: $(GOCMD) mod edit -dropreplace github.com/donomii/go-rwkv.cpp @@ -271,12 +272,12 @@ prepare-sources: get-sources replace ## GENERIC rebuild: ## Rebuilds the project $(GOCMD) clean -cache - $(MAKE) -C sources/go-llama-ggml clean + $(MAKE) -C sources/go-llama.cpp clean $(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ clean - $(MAKE) -C sources/go-rwkv clean + $(MAKE) -C sources/go-rwkv.cpp clean $(MAKE) -C sources/whisper.cpp clean $(MAKE) -C sources/go-stable-diffusion clean - $(MAKE) -C sources/go-bert clean + $(MAKE) -C sources/go-bert.cpp clean $(MAKE) -C sources/go-piper clean $(MAKE) -C sources/go-tiny-dream clean $(MAKE) build @@ -598,8 +599,8 @@ backend-assets/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/ backend-assets/grpc: protogen-go replace mkdir -p backend-assets/grpc -backend-assets/grpc/bert-embeddings: sources/go-bert sources/go-bert/libgobert.a backend-assets/grpc - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert LIBRARY_PATH=$(CURDIR)/sources/go-bert \ +backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc + CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/ backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc @@ -641,17 +642,16 @@ ifeq ($(BUILD_TYPE),metal) cp backend/cpp/llama/llama.cpp/build/bin/default.metallib backend-assets/grpc/ endif -backend-assets/grpc/llama-ggml: sources/go-llama-ggml sources/go-llama-ggml/libbinding.a backend-assets/grpc - $(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama-ggml - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama-ggml LIBRARY_PATH=$(CURDIR)/sources/go-llama-ggml \ +backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc + CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/ backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/ -backend-assets/grpc/rwkv: sources/go-rwkv sources/go-rwkv/librwkv.a backend-assets/grpc - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv LIBRARY_PATH=$(CURDIR)/sources/go-rwkv \ +backend-assets/grpc/rwkv: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a backend-assets/grpc + CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv.cpp LIBRARY_PATH=$(CURDIR)/sources/go-rwkv.cpp \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc From 502c1eedaa61ae742bfd6eb2e074e6f1180c2c66 Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Wed, 17 Apr 2024 22:21:55 -0500 Subject: [PATCH 0074/2648] feat: refactor the dynamic json configs for api_keys and external_backends (#2055) * feat: refactor the dynamic json configs for api_keys and external_backends Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: remove commented code Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --------- Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> Signed-off-by: Ettore Di Giacinto Co-authored-by: Ettore Di Giacinto --- core/cli/run.go | 13 +-- core/config/application_config.go | 7 ++ core/startup/config_file_watcher.go | 154 +++++++++++++++++++--------- core/startup/startup.go | 5 + 4 files changed, 117 insertions(+), 62 deletions(-) diff --git a/core/cli/run.go b/core/cli/run.go index 0f3ba2de..d729f946 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -2,7 +2,6 @@ package cli import ( "fmt" - "os" "strings" "time" @@ -65,6 +64,7 @@ func (r *RunCMD) Run(ctx *Context) error { config.WithAudioDir(r.AudioPath), config.WithUploadDir(r.UploadPath), config.WithConfigsDir(r.ConfigPath), + config.WithDynamicConfigDir(r.LocalaiConfigDir), config.WithF16(r.F16), config.WithStringGalleries(r.Galleries), config.WithModelLibraryURL(r.RemoteLibrary), @@ -134,17 +134,6 @@ func (r *RunCMD) Run(ctx *Context) error { return fmt.Errorf("failed basic startup tasks with error %s", err.Error()) } - // Watch the configuration directory - // If the directory does not exist, we don't watch it - if _, err := os.Stat(r.LocalaiConfigDir); err == nil { - closeConfigWatcherFn, err := startup.WatchConfigDirectory(r.LocalaiConfigDir, options) - defer closeConfigWatcherFn() - - if err != nil { - return fmt.Errorf("failed while watching configuration directory %s", r.LocalaiConfigDir) - } - } - appHTTP, err := http.App(cl, ml, options) if err != nil { log.Error().Err(err).Msg("error during HTTP App construction") diff --git a/core/config/application_config.go b/core/config/application_config.go index 9525553a..77817616 100644 --- a/core/config/application_config.go +++ b/core/config/application_config.go @@ -22,6 +22,7 @@ type ApplicationConfig struct { AudioDir string UploadDir string ConfigsDir string + DynamicConfigsDir string CORS bool PreloadJSONModels string PreloadModelsFromPath string @@ -264,6 +265,12 @@ func WithConfigsDir(configsDir string) AppOption { } } +func WithDynamicConfigDir(dynamicConfigsDir string) AppOption { + return func(o *ApplicationConfig) { + o.DynamicConfigsDir = dynamicConfigsDir + } +} + func WithApiKeys(apiKeys []string) AppOption { return func(o *ApplicationConfig) { o.ApiKeys = apiKeys diff --git a/core/startup/config_file_watcher.go b/core/startup/config_file_watcher.go index 9c758e25..5d213df5 100644 --- a/core/startup/config_file_watcher.go +++ b/core/startup/config_file_watcher.go @@ -12,89 +12,143 @@ import ( "github.com/rs/zerolog/log" ) -type WatchConfigDirectoryCloser func() error +type fileHandler func(fileContent []byte, appConfig *config.ApplicationConfig) error -func ReadApiKeysJson(configDir string, appConfig *config.ApplicationConfig) error { - fileContent, err := os.ReadFile(path.Join(configDir, "api_keys.json")) - if err == nil { - // Parse JSON content from the file - var fileKeys []string - err := json.Unmarshal(fileContent, &fileKeys) - if err == nil { - appConfig.ApiKeys = append(appConfig.ApiKeys, fileKeys...) - return nil - } - return err - } - return err +type configFileHandler struct { + handlers map[string]fileHandler + + watcher *fsnotify.Watcher + + configDir string + appConfig *config.ApplicationConfig } -func ReadExternalBackendsJson(configDir string, appConfig *config.ApplicationConfig) error { - fileContent, err := os.ReadFile(path.Join(configDir, "external_backends.json")) - if err != nil { - return err +// TODO: This should be a singleton eventually so other parts of the code can register config file handlers, +// then we can export it to other packages +func newConfigFileHandler(appConfig *config.ApplicationConfig) configFileHandler { + c := configFileHandler{ + handlers: make(map[string]fileHandler), + configDir: appConfig.DynamicConfigsDir, + appConfig: appConfig, } - // Parse JSON content from the file - var fileBackends map[string]string - err = json.Unmarshal(fileContent, &fileBackends) - if err != nil { - return err + c.Register("api_keys.json", readApiKeysJson(*appConfig), true) + c.Register("external_backends.json", readExternalBackendsJson(*appConfig), true) + return c +} + +func (c *configFileHandler) Register(filename string, handler fileHandler, runNow bool) error { + _, ok := c.handlers[filename] + if ok { + return fmt.Errorf("handler already registered for file %s", filename) } - err = mergo.Merge(&appConfig.ExternalGRPCBackends, fileBackends) - if err != nil { - return err + c.handlers[filename] = handler + if runNow { + c.callHandler(path.Join(c.appConfig.DynamicConfigsDir, filename), handler) } return nil } -var CONFIG_FILE_UPDATES = map[string]func(configDir string, appConfig *config.ApplicationConfig) error{ - "api_keys.json": ReadApiKeysJson, - "external_backends.json": ReadExternalBackendsJson, +func (c *configFileHandler) callHandler(filename string, handler fileHandler) { + fileContent, err := os.ReadFile(filename) + if err != nil && !os.IsNotExist(err) { + log.Error().Err(err).Str("filename", filename).Msg("could not read file") + } + + if err = handler(fileContent, c.appConfig); err != nil { + log.Error().Err(err).Msg("WatchConfigDirectory goroutine failed to update options") + } } -func WatchConfigDirectory(configDir string, appConfig *config.ApplicationConfig) (WatchConfigDirectoryCloser, error) { - if len(configDir) == 0 { - return nil, fmt.Errorf("configDir blank") - } +func (c *configFileHandler) Watch() error { configWatcher, err := fsnotify.NewWatcher() + c.watcher = configWatcher if err != nil { - log.Fatal().Msgf("Unable to create a watcher for the LocalAI Configuration Directory: %+v", err) - } - ret := func() error { - configWatcher.Close() - return nil + log.Fatal().Err(err).Str("configdir", c.configDir).Msg("wnable to create a watcher for configuration directory") } // Start listening for events. go func() { for { select { - case event, ok := <-configWatcher.Events: + case event, ok := <-c.watcher.Events: if !ok { return } - if event.Has(fsnotify.Write) { - for targetName, watchFn := range CONFIG_FILE_UPDATES { - if event.Name == targetName { - err := watchFn(configDir, appConfig) - log.Warn().Msgf("WatchConfigDirectory goroutine for %s: failed to update options: %+v", targetName, err) - } + if event.Has(fsnotify.Write | fsnotify.Create | fsnotify.Remove) { + handler, ok := c.handlers[path.Base(event.Name)] + if !ok { + continue } + + c.callHandler(event.Name, handler) } - case _, ok := <-configWatcher.Errors: + case err, ok := <-c.watcher.Errors: + log.Error().Err(err).Msg("config watcher error received") if !ok { return } - log.Error().Err(err).Msg("error encountered while watching config directory") } } }() // Add a path. - err = configWatcher.Add(configDir) + err = c.watcher.Add(c.appConfig.DynamicConfigsDir) if err != nil { - return ret, fmt.Errorf("unable to establish watch on the LocalAI Configuration Directory: %+v", err) + return fmt.Errorf("unable to establish watch on the LocalAI Configuration Directory: %+v", err) } - return ret, nil + return nil +} + +// TODO: When we institute graceful shutdown, this should be called +func (c *configFileHandler) Stop() { + c.watcher.Close() +} + +func readApiKeysJson(startupAppConfig config.ApplicationConfig) fileHandler { + handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error { + log.Debug().Msg("processing api_keys.json") + + if len(fileContent) > 0 { + // Parse JSON content from the file + var fileKeys []string + err := json.Unmarshal(fileContent, &fileKeys) + if err != nil { + return err + } + + appConfig.ApiKeys = append(startupAppConfig.ApiKeys, fileKeys...) + } else { + appConfig.ApiKeys = startupAppConfig.ApiKeys + } + log.Debug().Msg("api keys loaded from api_keys.json") + return nil + } + + return handler +} + +func readExternalBackendsJson(startupAppConfig config.ApplicationConfig) fileHandler { + handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error { + log.Debug().Msg("processing external_backends.json") + + if len(fileContent) > 0 { + // Parse JSON content from the file + var fileBackends map[string]string + err := json.Unmarshal(fileContent, &fileBackends) + if err != nil { + return err + } + appConfig.ExternalGRPCBackends = startupAppConfig.ExternalGRPCBackends + err = mergo.Merge(&appConfig.ExternalGRPCBackends, &fileBackends) + if err != nil { + return err + } + } else { + appConfig.ExternalGRPCBackends = startupAppConfig.ExternalGRPCBackends + } + log.Debug().Msg("external backends loaded from external_backends.json") + return nil + } + return handler } diff --git a/core/startup/startup.go b/core/startup/startup.go index 6298f034..af92f0e1 100644 --- a/core/startup/startup.go +++ b/core/startup/startup.go @@ -125,6 +125,11 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode }() } + // Watch the configuration directory + // If the directory does not exist, we don't watch it + configHandler := newConfigFileHandler(options) + configHandler.Watch() + log.Info().Msg("core/startup process completed!") return cl, ml, options, nil } From f9c75d487851749d3b382f64bb3d8a9bf52d94dd Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 18 Apr 2024 10:57:24 +0200 Subject: [PATCH 0075/2648] tests: add template tests (#2063) Signed-off-by: Ettore Di Giacinto --- pkg/model/loader_test.go | 105 ++++++++++++++++++++++++++++++++++ pkg/model/model_suite_test.go | 13 +++++ 2 files changed, 118 insertions(+) create mode 100644 pkg/model/loader_test.go create mode 100644 pkg/model/model_suite_test.go diff --git a/pkg/model/loader_test.go b/pkg/model/loader_test.go new file mode 100644 index 00000000..4c3c1a88 --- /dev/null +++ b/pkg/model/loader_test.go @@ -0,0 +1,105 @@ +package model_test + +import ( + "github.com/go-skynet/LocalAI/pkg/model" + . "github.com/go-skynet/LocalAI/pkg/model" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +const chatML = `<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} +{{- if .FunctionCall }} + +{{- else if eq .RoleName "tool" }} + +{{- end }} +{{- if .Content}} +{{.Content }} +{{- end }} +{{- if .FunctionCall}} +{{toJson .FunctionCall}} +{{- end }} +{{- if .FunctionCall }} + +{{- else if eq .RoleName "tool" }} + +{{- end }} +<|im_end|>` + +var testMatch map[string]map[string]interface{} = map[string]map[string]interface{}{ + "user": { + "template": chatML, + "expected": "<|im_start|>user\nA long time ago in a galaxy far, far away...\n<|im_end|>", + "data": model.ChatMessageTemplateData{ + SystemPrompt: "", + Role: "user", + RoleName: "user", + Content: "A long time ago in a galaxy far, far away...", + FunctionCall: nil, + FunctionName: "", + LastMessage: false, + Function: false, + MessageIndex: 0, + }, + }, + "assistant": { + "template": chatML, + "expected": "<|im_start|>assistant\nA long time ago in a galaxy far, far away...\n<|im_end|>", + "data": model.ChatMessageTemplateData{ + SystemPrompt: "", + Role: "assistant", + RoleName: "assistant", + Content: "A long time ago in a galaxy far, far away...", + FunctionCall: nil, + FunctionName: "", + LastMessage: false, + Function: false, + MessageIndex: 0, + }, + }, + "function_call": { + "template": chatML, + "expected": "<|im_start|>assistant\n\n{\"function\":\"test\"}\n\n<|im_end|>", + "data": model.ChatMessageTemplateData{ + SystemPrompt: "", + Role: "assistant", + RoleName: "assistant", + Content: "", + FunctionCall: map[string]string{"function": "test"}, + FunctionName: "", + LastMessage: false, + Function: false, + MessageIndex: 0, + }, + }, + "function_response": { + "template": chatML, + "expected": "<|im_start|>tool\n\nResponse from tool\n\n<|im_end|>", + "data": model.ChatMessageTemplateData{ + SystemPrompt: "", + Role: "tool", + RoleName: "tool", + Content: "Response from tool", + FunctionCall: nil, + FunctionName: "", + LastMessage: false, + Function: false, + MessageIndex: 0, + }, + }, +} + +var _ = Describe("Templates", func() { + Context("chat message", func() { + modelLoader := NewModelLoader("") + for key := range testMatch { + foo := testMatch[key] + It("renders correctly "+key, func() { + templated, err := modelLoader.EvaluateTemplateForChatMessage(foo["template"].(string), foo["data"].(model.ChatMessageTemplateData)) + Expect(err).ToNot(HaveOccurred()) + Expect(templated).To(Equal(foo["expected"]), templated) + }) + } + }) +}) diff --git a/pkg/model/model_suite_test.go b/pkg/model/model_suite_test.go new file mode 100644 index 00000000..6fa9c004 --- /dev/null +++ b/pkg/model/model_suite_test.go @@ -0,0 +1,13 @@ +package model_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestModel(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "LocalAI model test") +} From 8f2681f90420b4818ee270b4ad7c570ed462b09c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 18 Apr 2024 17:17:33 +0000 Subject: [PATCH 0076/2648] build(deps): bump aiohttp from 3.9.2 to 3.9.4 in /examples/langchain/langchainpy-localai-example in the pip group across 1 directory (#2067) build(deps): bump aiohttp Bumps the pip group with 1 update in the /examples/langchain/langchainpy-localai-example directory: [aiohttp](https://github.com/aio-libs/aiohttp). Updates `aiohttp` from 3.9.2 to 3.9.4 - [Release notes](https://github.com/aio-libs/aiohttp/releases) - [Changelog](https://github.com/aio-libs/aiohttp/blob/master/CHANGES.rst) - [Commits](https://github.com/aio-libs/aiohttp/compare/v3.9.2...v3.9.4) --- updated-dependencies: - dependency-name: aiohttp dependency-type: direct:production dependency-group: pip ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- examples/langchain/langchainpy-localai-example/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt index 1e63b0bf..ba7f8429 100644 --- a/examples/langchain/langchainpy-localai-example/requirements.txt +++ b/examples/langchain/langchainpy-localai-example/requirements.txt @@ -1,4 +1,4 @@ -aiohttp==3.9.2 +aiohttp==3.9.4 aiosignal==1.3.1 async-timeout==4.0.2 attrs==23.1.0 From 13012cfa70d8440a78d3a9c88500597c8cc8ed98 Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Thu, 18 Apr 2024 15:19:36 -0500 Subject: [PATCH 0077/2648] feat: better control of GRPC docker cache (#2070) Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- .github/workflows/generate_grpc_cache.yaml | 90 ++++++++++++++++++++++ .github/workflows/image-pr.yml | 9 ++- .github/workflows/image.yml | 22 ++++-- .github/workflows/image_build.yml | 15 ++-- Dockerfile | 3 +- 5 files changed, 126 insertions(+), 13 deletions(-) create mode 100644 .github/workflows/generate_grpc_cache.yaml diff --git a/.github/workflows/generate_grpc_cache.yaml b/.github/workflows/generate_grpc_cache.yaml new file mode 100644 index 00000000..11abc10a --- /dev/null +++ b/.github/workflows/generate_grpc_cache.yaml @@ -0,0 +1,90 @@ +name: 'generate and publish GRPC docker caches' + +on: +- workflow_dispatch + +concurrency: + group: ci-${{ github.head_ref || github.ref }}-${{ github.repository }} + cancel-in-progress: true + +jobs: + generate_caches: + strategy: + matrix: + include: + - grpc-base-image: ubuntu:22.04 + runs-on: 'ubuntu-latest' + platforms: 'linux/amd64' + runs-on: ${{matrix.runs-on}} + steps: + - name: Release space from worker + if: matrix.runs-on == 'ubuntu-latest' + run: | + echo "Listing top largest packages" + pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) + head -n 30 <<< "${pkgs}" + echo + df -h + echo + sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true + sudo apt-get remove --auto-remove android-sdk-platform-tools || true + sudo apt-get purge --auto-remove android-sdk-platform-tools || true + sudo rm -rf /usr/local/lib/android + sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true + sudo rm -rf /usr/share/dotnet + sudo apt-get remove -y '^mono-.*' || true + sudo apt-get remove -y '^ghc-.*' || true + sudo apt-get remove -y '.*jdk.*|.*jre.*' || true + sudo apt-get remove -y 'php.*' || true + sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true + sudo apt-get remove -y '^google-.*' || true + sudo apt-get remove -y azure-cli || true + sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true + sudo apt-get remove -y '^gfortran-.*' || true + sudo apt-get remove -y microsoft-edge-stable || true + sudo apt-get remove -y firefox || true + sudo apt-get remove -y powershell || true + sudo apt-get remove -y r-base-core || true + sudo apt-get autoremove -y + sudo apt-get clean + echo + echo "Listing top largest packages" + pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) + head -n 30 <<< "${pkgs}" + echo + sudo rm -rfv build || true + sudo rm -rf /usr/share/dotnet || true + sudo rm -rf /opt/ghc || true + sudo rm -rf "/usr/local/share/boost" || true + sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true + df -h + + - name: Set up QEMU + uses: docker/setup-qemu-action@master + with: + platforms: all + + - name: Set up Docker Buildx + id: buildx + uses: docker/setup-buildx-action@master + + - name: Checkout + uses: actions/checkout@v4 + + - name: Cache GRPC + uses: docker/build-push-action@v5 + with: + builder: ${{ steps.buildx.outputs.name }} + # The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache. + # This means that even the MAKEFLAGS have to be an EXACT match. + # If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch. + build-args: | + GRPC_BASE_IMAGE=${{ matrix.grpc-base-image }} + MAKEFLAGS=--jobs=4 --output-sync=target + GRPC_VERSION=v1.58.0 + context: . + file: ./Dockerfile + cache-to: type=gha,ignore-error=true + target: grpc + platforms: ${{ matrix.platforms }} + push: false \ No newline at end of file diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml index b703b16d..9c4fece7 100644 --- a/.github/workflows/image-pr.yml +++ b/.github/workflows/image-pr.yml @@ -22,6 +22,7 @@ jobs: platforms: ${{ matrix.platforms }} runs-on: ${{ matrix.runs-on }} base-image: ${{ matrix.base-image }} + grpc-base-image: ${{ matrix.grpc-base-image }} makeflags: ${{ matrix.makeflags }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} @@ -61,12 +62,14 @@ jobs: ffmpeg: 'false' image-type: 'extras' base-image: "rocm/dev-ubuntu-22.04:6.0-complete" + grpc-base-image: "ubuntu:22.04" runs-on: 'arc-runner-set' makeflags: "--jobs=3 --output-sync=target" - build-type: 'sycl_f16' platforms: 'linux/amd64' tag-latest: 'false' base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + grpc-base-image: "ubuntu:22.04" tag-suffix: 'sycl-f16-ffmpeg' ffmpeg: 'true' image-type: 'extras' @@ -85,6 +88,7 @@ jobs: platforms: ${{ matrix.platforms }} runs-on: ${{ matrix.runs-on }} base-image: ${{ matrix.base-image }} + grpc-base-image: ${{ matrix.grpc-base-image }} makeflags: ${{ matrix.makeflags }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} @@ -102,11 +106,12 @@ jobs: image-type: 'core' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" - makeflags: "--jobs=5 --output-sync=target" + makeflags: "--jobs=4 --output-sync=target" - build-type: 'sycl_f16' platforms: 'linux/amd64' tag-latest: 'false' base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + grpc-base-image: "ubuntu:22.04" tag-suffix: 'sycl-f16-ffmpeg-core' ffmpeg: 'true' image-type: 'core' @@ -122,4 +127,4 @@ jobs: image-type: 'core' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" - makeflags: "--jobs=5 --output-sync=target" \ No newline at end of file + makeflags: "--jobs=4 --output-sync=target" \ No newline at end of file diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index d2607579..255c1c65 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -26,6 +26,7 @@ jobs: platforms: ${{ matrix.platforms }} runs-on: ${{ matrix.runs-on }} base-image: ${{ matrix.base-image }} + grpc-base-image: ${{ matrix.grpc-base-image }} aio: ${{ matrix.aio }} makeflags: ${{ matrix.makeflags }} latest-image: ${{ matrix.latest-image }} @@ -129,6 +130,7 @@ jobs: image-type: 'extras' aio: "-aio-gpu-hipblas" base-image: "rocm/dev-ubuntu-22.04:6.0-complete" + grpc-base-image: "ubuntu:22.04" latest-image: 'latest-gpu-hipblas' latest-image-aio: 'latest-aio-gpu-hipblas' runs-on: 'arc-runner-set' @@ -140,12 +142,14 @@ jobs: ffmpeg: 'false' image-type: 'extras' base-image: "rocm/dev-ubuntu-22.04:6.0-complete" + grpc-base-image: "ubuntu:22.04" runs-on: 'arc-runner-set' makeflags: "--jobs=3 --output-sync=target" - build-type: 'sycl_f16' platforms: 'linux/amd64' tag-latest: 'auto' base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + grpc-base-image: "ubuntu:22.04" tag-suffix: '-sycl-f16-ffmpeg' ffmpeg: 'true' image-type: 'extras' @@ -158,6 +162,7 @@ jobs: platforms: 'linux/amd64' tag-latest: 'auto' base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + grpc-base-image: "ubuntu:22.04" tag-suffix: '-sycl-f32-ffmpeg' ffmpeg: 'true' image-type: 'extras' @@ -171,6 +176,7 @@ jobs: platforms: 'linux/amd64' tag-latest: 'false' base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + grpc-base-image: "ubuntu:22.04" tag-suffix: '-sycl-f16-core' ffmpeg: 'false' image-type: 'core' @@ -180,6 +186,7 @@ jobs: platforms: 'linux/amd64' tag-latest: 'false' base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + grpc-base-image: "ubuntu:22.04" tag-suffix: '-sycl-f32-core' ffmpeg: 'false' image-type: 'core' @@ -189,6 +196,7 @@ jobs: platforms: 'linux/amd64' tag-latest: 'false' base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + grpc-base-image: "ubuntu:22.04" tag-suffix: '-sycl-f16-ffmpeg-core' ffmpeg: 'true' image-type: 'core' @@ -198,6 +206,7 @@ jobs: platforms: 'linux/amd64' tag-latest: 'false' base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + grpc-base-image: "ubuntu:22.04" tag-suffix: '-sycl-f32-ffmpeg-core' ffmpeg: 'true' image-type: 'core' @@ -210,6 +219,7 @@ jobs: ffmpeg: 'true' image-type: 'core' base-image: "rocm/dev-ubuntu-22.04:6.0-complete" + grpc-base-image: "ubuntu:22.04" runs-on: 'arc-runner-set' makeflags: "--jobs=3 --output-sync=target" - build-type: 'hipblas' @@ -219,6 +229,7 @@ jobs: ffmpeg: 'false' image-type: 'core' base-image: "rocm/dev-ubuntu-22.04:6.0-complete" + grpc-base-image: "ubuntu:22.04" runs-on: 'arc-runner-set' makeflags: "--jobs=3 --output-sync=target" @@ -236,6 +247,7 @@ jobs: runs-on: ${{ matrix.runs-on }} aio: ${{ matrix.aio }} base-image: ${{ matrix.base-image }} + grpc-base-image: ${{ matrix.grpc-base-image }} makeflags: ${{ matrix.makeflags }} latest-image: ${{ matrix.latest-image }} latest-image-aio: ${{ matrix.latest-image-aio }} @@ -258,7 +270,7 @@ jobs: aio: "-aio-cpu" latest-image: 'latest-cpu' latest-image-aio: 'latest-aio-cpu' - makeflags: "--jobs=5 --output-sync=target" + makeflags: "--jobs=4 --output-sync=target" - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -269,7 +281,7 @@ jobs: image-type: 'core' base-image: "ubuntu:22.04" runs-on: 'ubuntu-latest' - makeflags: "--jobs=5 --output-sync=target" + makeflags: "--jobs=4 --output-sync=target" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "1" @@ -280,7 +292,7 @@ jobs: image-type: 'core' base-image: "ubuntu:22.04" runs-on: 'ubuntu-latest' - makeflags: "--jobs=5 --output-sync=target" + makeflags: "--jobs=4 --output-sync=target" - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -291,7 +303,7 @@ jobs: image-type: 'core' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" - makeflags: "--jobs=5 --output-sync=target" + makeflags: "--jobs=4 --output-sync=target" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "1" @@ -302,4 +314,4 @@ jobs: image-type: 'core' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" - makeflags: "--jobs=5 --output-sync=target" + makeflags: "--jobs=4 --output-sync=target" diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index b0684a4c..b06100ff 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -6,6 +6,10 @@ on: inputs: base-image: description: 'Base image' + required: true + type: string + grpc-base-image: + description: 'GRPC Base image, must be a compatible image with base-image' required: false default: '' type: string @@ -57,7 +61,7 @@ on: makeflags: description: 'Make Flags' required: false - default: '--jobs=3 --output-sync=target' + default: '--jobs=4 --output-sync=target' type: string aio: description: 'AIO Image Name' @@ -201,15 +205,16 @@ jobs: uses: docker/build-push-action@v5 with: builder: ${{ steps.buildx.outputs.name }} + # The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache. + # This means that even the MAKEFLAGS have to be an EXACT match. + # If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch. build-args: | - IMAGE_TYPE=${{ inputs.image-type }} - BASE_IMAGE=${{ inputs.base-image }} - MAKEFLAGS=${{ inputs.makeflags }} + GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }} + MAKEFLAGS=--jobs=4 --output-sync=target GRPC_VERSION=v1.58.0 context: . file: ./Dockerfile cache-from: type=gha - cache-to: type=gha,ignore-error=true target: grpc platforms: ${{ inputs.platforms }} push: false diff --git a/Dockerfile b/Dockerfile index 397fbe22..805ac3a6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,6 @@ ARG IMAGE_TYPE=extras ARG BASE_IMAGE=ubuntu:22.04 +ARG GRPC_BASE_IMAGE=${BASE_IMAGE} # extras or core FROM ${BASE_IMAGE} as requirements-core @@ -104,7 +105,7 @@ RUN if [ ! -e /usr/bin/python ]; then \ ################################### ################################### -FROM ${BASE_IMAGE} as grpc +FROM ${GRPC_BASE_IMAGE} as grpc ARG MAKEFLAGS ARG GRPC_VERSION=v1.58.0 From bbea62b907db917b8ad7036d06b828da48269bf8 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 18 Apr 2024 22:43:12 +0200 Subject: [PATCH 0078/2648] feat(functions): support models with no grammar, add tests (#2068) Signed-off-by: Ettore Di Giacinto --- core/config/backend_config.go | 10 +- core/http/endpoints/openai/chat.go | 131 ++++++------------ core/http/endpoints/openai/completion.go | 4 +- core/http/endpoints/openai/request.go | 4 +- core/schema/openai.go | 14 +- pkg/{grammar => functions}/functions.go | 2 +- .../functions_suite_test.go} | 2 +- pkg/{grammar => functions}/functions_test.go | 4 +- .../grammar_json_schema.go} | 2 +- .../grammar_json_schema_test.go} | 4 +- pkg/functions/parse.go | 108 +++++++++++++++ pkg/functions/parse_test.go | 85 ++++++++++++ pkg/model/loader.go | 4 +- 13 files changed, 255 insertions(+), 119 deletions(-) rename pkg/{grammar => functions}/functions.go (98%) rename pkg/{grammar/grammar_suite_test.go => functions/functions_suite_test.go} (90%) rename pkg/{grammar => functions}/functions_test.go (96%) rename pkg/{grammar/json_schema.go => functions/grammar_json_schema.go} (99%) rename pkg/{grammar/json_schema_test.go => functions/grammar_json_schema_test.go} (98%) create mode 100644 pkg/functions/parse.go create mode 100644 pkg/functions/parse_test.go diff --git a/core/config/backend_config.go b/core/config/backend_config.go index 81c92d01..1161cf9f 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -12,6 +12,7 @@ import ( "github.com/go-skynet/LocalAI/core/schema" "github.com/go-skynet/LocalAI/pkg/downloader" + "github.com/go-skynet/LocalAI/pkg/functions" "github.com/go-skynet/LocalAI/pkg/utils" "github.com/rs/zerolog/log" "gopkg.in/yaml.v3" @@ -39,7 +40,7 @@ type BackendConfig struct { InputToken [][]int `yaml:"-"` functionCallString, functionCallNameString string `yaml:"-"` - FunctionsConfig Functions `yaml:"function"` + FunctionsConfig functions.FunctionsConfig `yaml:"function"` FeatureFlag FeatureFlag `yaml:"feature_flags"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early. // LLM configs (GPT4ALL, Llama.cpp, ...) @@ -157,13 +158,6 @@ type AutoGPTQ struct { UseFastTokenizer bool `yaml:"use_fast_tokenizer"` } -type Functions struct { - DisableNoAction bool `yaml:"disable_no_action"` - NoActionFunctionName string `yaml:"no_action_function_name"` - NoActionDescriptionName string `yaml:"no_action_description_name"` - ParallelCalls bool `yaml:"parallel_calls"` -} - type TemplateConfig struct { Chat string `yaml:"chat"` ChatMessage string `yaml:"chat_message"` diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index 36d1142b..9adba8ea 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -11,9 +11,8 @@ import ( "github.com/go-skynet/LocalAI/core/backend" "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/schema" - "github.com/go-skynet/LocalAI/pkg/grammar" + "github.com/go-skynet/LocalAI/pkg/functions" model "github.com/go-skynet/LocalAI/pkg/model" - "github.com/go-skynet/LocalAI/pkg/utils" "github.com/gofiber/fiber/v2" "github.com/google/uuid" "github.com/rs/zerolog/log" @@ -68,8 +67,8 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup return true }) - results := parseFunctionCall(result, config.FunctionsConfig.ParallelCalls) - noActionToRun := len(results) > 0 && results[0].name == noAction + results := functions.ParseFunctionCall(result, config.FunctionsConfig) + noActionToRun := len(results) > 0 && results[0].Name == noAction || len(results) == 0 switch { case noActionToRun: @@ -82,7 +81,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup } responses <- initialMessage - result, err := handleQuestion(config, req, ml, startupOptions, results[0].arguments, prompt) + result, err := handleQuestion(config, req, ml, startupOptions, results, prompt) if err != nil { log.Error().Err(err).Msg("error handling question") return @@ -105,7 +104,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup default: for i, ss := range results { - name, args := ss.name, ss.arguments + name, args := ss.Name, ss.Arguments initialMessage := schema.OpenAIResponse{ ID: id, @@ -156,8 +155,6 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup } return func(c *fiber.Ctx) error { - processFunctions := false - funcs := grammar.Functions{} modelFile, input, err := readRequest(c, ml, startupOptions, true) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) @@ -169,6 +166,9 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup } log.Debug().Msgf("Configuration read: %+v", config) + funcs := input.Functions + shouldUseFn := len(input.Functions) > 0 && config.ShouldUseFunctions() + // Allow the user to set custom actions via config file // to be "embedded" in each model noActionName := "answer" @@ -182,18 +182,18 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup } if input.ResponseFormat.Type == "json_object" { - input.Grammar = grammar.JSONBNF + input.Grammar = functions.JSONBNF } config.Grammar = input.Grammar - // process functions if we have any defined or if we have a function call string - if len(input.Functions) > 0 && config.ShouldUseFunctions() { + if shouldUseFn { log.Debug().Msgf("Response needs to process functions") + } - processFunctions = true - - noActionGrammar := grammar.Function{ + switch { + case !config.FunctionsConfig.NoGrammar && shouldUseFn: + noActionGrammar := functions.Function{ Name: noActionName, Description: noActionDescription, Parameters: map[string]interface{}{ @@ -206,7 +206,6 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup } // Append the no action function - funcs = append(funcs, input.Functions...) if !config.FunctionsConfig.DisableNoAction { funcs = append(funcs, noActionGrammar) } @@ -219,10 +218,17 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup // Update input grammar jsStruct := funcs.ToJSONStructure() config.Grammar = jsStruct.Grammar("", config.FunctionsConfig.ParallelCalls) - } else if input.JSONFunctionGrammarObject != nil { + case input.JSONFunctionGrammarObject != nil: config.Grammar = input.JSONFunctionGrammarObject.Grammar("", config.FunctionsConfig.ParallelCalls) + default: + // Force picking one of the functions by the request + if config.FunctionToCall() != "" { + funcs = funcs.Select(config.FunctionToCall()) + } } + // process functions if we have any defined or if we have a function call string + // functions are not supported in stream mode (yet?) toStream := input.Stream @@ -232,8 +238,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup // If we are using the tokenizer template, we don't need to process the messages // unless we are processing functions - if !config.TemplateConfig.UseTokenizerTemplate || processFunctions { - + if !config.TemplateConfig.UseTokenizerTemplate || shouldUseFn { suppressConfigSystemPrompt := false mess := []string{} for messageIndex, i := range input.Messages { @@ -346,11 +351,11 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup templateFile = config.Model } - if config.TemplateConfig.Chat != "" && !processFunctions { + if config.TemplateConfig.Chat != "" && !shouldUseFn { templateFile = config.TemplateConfig.Chat } - if config.TemplateConfig.Functions != "" && processFunctions { + if config.TemplateConfig.Functions != "" && shouldUseFn { templateFile = config.TemplateConfig.Functions } @@ -370,7 +375,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup } log.Debug().Msgf("Prompt (after templating): %s", predInput) - if processFunctions { + if shouldUseFn && config.Grammar != "" { log.Debug().Msgf("Grammar: %+v", config.Grammar) } } @@ -388,7 +393,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup responses := make(chan schema.OpenAIResponse) - if !processFunctions { + if !shouldUseFn { go process(predInput, input, config, ml, responses) } else { go processTools(noActionName, predInput, input, config, ml, responses) @@ -446,18 +451,18 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup // no streaming mode default: result, tokenUsage, err := ComputeChoices(input, predInput, config, startupOptions, ml, func(s string, c *[]schema.Choice) { - if !processFunctions { + if !shouldUseFn { // no function is called, just reply and use stop as finish reason *c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}}) return } - results := parseFunctionCall(s, config.FunctionsConfig.ParallelCalls) - noActionsToRun := len(results) > 0 && results[0].name == noActionName + results := functions.ParseFunctionCall(s, config.FunctionsConfig) + noActionsToRun := len(results) > 0 && results[0].Name == noActionName || len(results) == 0 switch { case noActionsToRun: - result, err := handleQuestion(config, input, ml, startupOptions, results[0].arguments, predInput) + result, err := handleQuestion(config, input, ml, startupOptions, results, predInput) if err != nil { log.Error().Err(err).Msg("error handling question") return @@ -476,7 +481,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup } for _, ss := range results { - name, args := ss.name, ss.arguments + name, args := ss.Name, ss.Arguments if len(input.Tools) > 0 { // If we are using tools, we condense the function calls into // a single response choice with all the tools @@ -534,16 +539,20 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup // Return the prediction in the response body return c.JSON(resp) } - } } -func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, ml *model.ModelLoader, o *config.ApplicationConfig, args, prompt string) (string, error) { +func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, ml *model.ModelLoader, o *config.ApplicationConfig, funcResults []functions.FuncCallResults, prompt string) (string, error) { log.Debug().Msgf("nothing to do, computing a reply") - + arg := "" + if len(funcResults) > 0 { + arg = funcResults[0].Arguments + } // If there is a message that the LLM already sends as part of the JSON reply, use it arguments := map[string]interface{}{} - json.Unmarshal([]byte(args), &arguments) + if err := json.Unmarshal([]byte(arg), &arguments); err != nil { + log.Debug().Msg("handleQuestion: function result did not contain a valid JSON object") + } m, exists := arguments["message"] if exists { switch message := m.(type) { @@ -580,63 +589,3 @@ func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, m } return backend.Finetune(*config, prompt, prediction.Response), nil } - -type funcCallResults struct { - name string - arguments string -} - -func parseFunctionCall(llmresult string, multipleResults bool) []funcCallResults { - results := []funcCallResults{} - - // TODO: use generics to avoid this code duplication - if multipleResults { - ss := []map[string]interface{}{} - s := utils.EscapeNewLines(llmresult) - json.Unmarshal([]byte(s), &ss) - log.Debug().Msgf("Function return: %s %+v", s, ss) - - for _, s := range ss { - func_name, ok := s["function"] - if !ok { - continue - } - args, ok := s["arguments"] - if !ok { - continue - } - d, _ := json.Marshal(args) - funcName, ok := func_name.(string) - if !ok { - continue - } - results = append(results, funcCallResults{name: funcName, arguments: string(d)}) - } - } else { - // As we have to change the result before processing, we can't stream the answer token-by-token (yet?) - ss := map[string]interface{}{} - // This prevent newlines to break JSON parsing for clients - s := utils.EscapeNewLines(llmresult) - json.Unmarshal([]byte(s), &ss) - log.Debug().Msgf("Function return: %s %+v", s, ss) - - // The grammar defines the function name as "function", while OpenAI returns "name" - func_name, ok := ss["function"] - if !ok { - return results - } - // Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object - args, ok := ss["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix) - if !ok { - return results - } - d, _ := json.Marshal(args) - funcName, ok := func_name.(string) - if !ok { - return results - } - results = append(results, funcCallResults{name: funcName, arguments: string(d)}) - } - - return results -} diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go index 69923475..bcd46db5 100644 --- a/core/http/endpoints/openai/completion.go +++ b/core/http/endpoints/openai/completion.go @@ -12,7 +12,7 @@ import ( "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/schema" - "github.com/go-skynet/LocalAI/pkg/grammar" + "github.com/go-skynet/LocalAI/pkg/functions" model "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" "github.com/google/uuid" @@ -70,7 +70,7 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a } if input.ResponseFormat.Type == "json_object" { - input.Grammar = grammar.JSONBNF + input.Grammar = functions.JSONBNF } config.Grammar = input.Grammar diff --git a/core/http/endpoints/openai/request.go b/core/http/endpoints/openai/request.go index 369fb0b8..9a107bab 100644 --- a/core/http/endpoints/openai/request.go +++ b/core/http/endpoints/openai/request.go @@ -12,7 +12,7 @@ import ( "github.com/go-skynet/LocalAI/core/config" fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" "github.com/go-skynet/LocalAI/core/schema" - "github.com/go-skynet/LocalAI/pkg/grammar" + "github.com/go-skynet/LocalAI/pkg/functions" model "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" "github.com/rs/zerolog/log" @@ -145,7 +145,7 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque } if input.ToolsChoice != nil { - var toolChoice grammar.Tool + var toolChoice functions.Tool switch content := input.ToolsChoice.(type) { case string: diff --git a/core/schema/openai.go b/core/schema/openai.go index 6aa0f1b0..a251ba68 100644 --- a/core/schema/openai.go +++ b/core/schema/openai.go @@ -3,7 +3,7 @@ package schema import ( "context" - "github.com/go-skynet/LocalAI/pkg/grammar" + functions "github.com/go-skynet/LocalAI/pkg/functions" ) // APIError provides error information returned by the OpenAI API. @@ -108,7 +108,7 @@ type ChatCompletionResponseFormat struct { type OpenAIRequest struct { PredictionOptions - Context context.Context `json:"-"` + Context context.Context `json:"-"` Cancel context.CancelFunc `json:"-"` // whisper @@ -130,11 +130,11 @@ type OpenAIRequest struct { Messages []Message `json:"messages" yaml:"messages"` // A list of available functions to call - Functions []grammar.Function `json:"functions" yaml:"functions"` - FunctionCall interface{} `json:"function_call" yaml:"function_call"` // might be a string or an object + Functions functions.Functions `json:"functions" yaml:"functions"` + FunctionCall interface{} `json:"function_call" yaml:"function_call"` // might be a string or an object - Tools []grammar.Tool `json:"tools,omitempty" yaml:"tools"` - ToolsChoice interface{} `json:"tool_choice,omitempty" yaml:"tool_choice"` + Tools []functions.Tool `json:"tools,omitempty" yaml:"tools"` + ToolsChoice interface{} `json:"tool_choice,omitempty" yaml:"tool_choice"` Stream bool `json:"stream"` @@ -145,7 +145,7 @@ type OpenAIRequest struct { // A grammar to constrain the LLM output Grammar string `json:"grammar" yaml:"grammar"` - JSONFunctionGrammarObject *grammar.JSONFunctionStructure `json:"grammar_json_functions" yaml:"grammar_json_functions"` + JSONFunctionGrammarObject *functions.JSONFunctionStructure `json:"grammar_json_functions" yaml:"grammar_json_functions"` Backend string `json:"backend" yaml:"backend"` diff --git a/pkg/grammar/functions.go b/pkg/functions/functions.go similarity index 98% rename from pkg/grammar/functions.go rename to pkg/functions/functions.go index 1038f5e6..d75a2ee3 100644 --- a/pkg/grammar/functions.go +++ b/pkg/functions/functions.go @@ -1,4 +1,4 @@ -package grammar +package functions import ( "encoding/json" diff --git a/pkg/grammar/grammar_suite_test.go b/pkg/functions/functions_suite_test.go similarity index 90% rename from pkg/grammar/grammar_suite_test.go rename to pkg/functions/functions_suite_test.go index 652643b6..8964b1c8 100644 --- a/pkg/grammar/grammar_suite_test.go +++ b/pkg/functions/functions_suite_test.go @@ -1,4 +1,4 @@ -package grammar +package functions import ( "testing" diff --git a/pkg/grammar/functions_test.go b/pkg/functions/functions_test.go similarity index 96% rename from pkg/grammar/functions_test.go rename to pkg/functions/functions_test.go index 6e8a56ed..97953a5e 100644 --- a/pkg/grammar/functions_test.go +++ b/pkg/functions/functions_test.go @@ -1,7 +1,7 @@ -package grammar_test +package functions_test import ( - . "github.com/go-skynet/LocalAI/pkg/grammar" + . "github.com/go-skynet/LocalAI/pkg/functions" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) diff --git a/pkg/grammar/json_schema.go b/pkg/functions/grammar_json_schema.go similarity index 99% rename from pkg/grammar/json_schema.go rename to pkg/functions/grammar_json_schema.go index 76f9778f..01046390 100644 --- a/pkg/grammar/json_schema.go +++ b/pkg/functions/grammar_json_schema.go @@ -1,4 +1,4 @@ -package grammar +package functions // a golang port of https://github.com/ggerganov/llama.cpp/pull/1887 diff --git a/pkg/grammar/json_schema_test.go b/pkg/functions/grammar_json_schema_test.go similarity index 98% rename from pkg/grammar/json_schema_test.go rename to pkg/functions/grammar_json_schema_test.go index 39d2a4d5..fc9029a8 100644 --- a/pkg/grammar/json_schema_test.go +++ b/pkg/functions/grammar_json_schema_test.go @@ -1,9 +1,9 @@ -package grammar_test +package functions_test import ( "strings" - . "github.com/go-skynet/LocalAI/pkg/grammar" + . "github.com/go-skynet/LocalAI/pkg/functions" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) diff --git a/pkg/functions/parse.go b/pkg/functions/parse.go new file mode 100644 index 00000000..5324e8c6 --- /dev/null +++ b/pkg/functions/parse.go @@ -0,0 +1,108 @@ +package functions + +import ( + "encoding/json" + "regexp" + + "github.com/go-skynet/LocalAI/pkg/utils" + "github.com/rs/zerolog/log" +) + +type FunctionsConfig struct { + DisableNoAction bool `yaml:"disable_no_action"` + NoActionFunctionName string `yaml:"no_action_function_name"` + NoActionDescriptionName string `yaml:"no_action_description_name"` + ParallelCalls bool `yaml:"parallel_calls"` + NoGrammar bool `yaml:"no_grammar"` + ResponseRegex string `yaml:"response_regex"` +} + +type FuncCallResults struct { + Name string + Arguments string +} + +func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncCallResults { + multipleResults := functionConfig.ParallelCalls + useGrammars := !functionConfig.NoGrammar + + results := []FuncCallResults{} + + // if no grammar is used, we have to extract function and arguments from the result + if !useGrammars { + // the response is a string that we have to parse + + // We use named regexes here to extract the function name and arguments + // obviously, this expects the LLM to be stable and return correctly formatted JSON + // TODO: optimize this and pre-compile it + var respRegex = regexp.MustCompile(functionConfig.ResponseRegex) + match := respRegex.FindStringSubmatch(llmresult) + result := make(map[string]string) + for i, name := range respRegex.SubexpNames() { + if i != 0 && name != "" && len(match) > i { + result[name] = match[i] + } + } + + // TODO: open point about multiple results and/or mixed with chat messages + // This is not handled as for now, we only expect one function call per response + functionName := result["function"] + if functionName == "" { + return results + } + + return append(results, FuncCallResults{Name: result["function"], Arguments: result["arguments"]}) + } + + // with grammars + // TODO: use generics to avoid this code duplication + if multipleResults { + ss := []map[string]interface{}{} + s := utils.EscapeNewLines(llmresult) + json.Unmarshal([]byte(s), &ss) + log.Debug().Msgf("Function return: %s %+v", s, ss) + + for _, s := range ss { + func_name, ok := s["function"] + if !ok { + continue + } + args, ok := s["arguments"] + if !ok { + continue + } + d, _ := json.Marshal(args) + funcName, ok := func_name.(string) + if !ok { + continue + } + results = append(results, FuncCallResults{Name: funcName, Arguments: string(d)}) + } + } else { + // As we have to change the result before processing, we can't stream the answer token-by-token (yet?) + ss := map[string]interface{}{} + // This prevent newlines to break JSON parsing for clients + s := utils.EscapeNewLines(llmresult) + json.Unmarshal([]byte(s), &ss) + log.Debug().Msgf("Function return: %s %+v", s, ss) + + // The grammar defines the function name as "function", while OpenAI returns "name" + func_name, ok := ss["function"] + if !ok { + return results + } + // Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object + args, ok := ss["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix) + if !ok { + return results + } + d, _ := json.Marshal(args) + funcName, ok := func_name.(string) + if !ok { + return results + } + results = append(results, FuncCallResults{Name: funcName, Arguments: string(d)}) + } + + return results +} diff --git a/pkg/functions/parse_test.go b/pkg/functions/parse_test.go new file mode 100644 index 00000000..5168a7d1 --- /dev/null +++ b/pkg/functions/parse_test.go @@ -0,0 +1,85 @@ +package functions_test + +import ( + . "github.com/go-skynet/LocalAI/pkg/functions" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("LocalAI function parse tests", func() { + var functionConfig FunctionsConfig + + BeforeEach(func() { + // Default configuration setup + functionConfig = FunctionsConfig{ + ParallelCalls: false, + NoGrammar: false, + ResponseRegex: `(?P\w+)\s*\((?P.*)\)`, + } + }) + + Context("when using grammars and single result expected", func() { + It("should parse the function name and arguments correctly", func() { + input := `{"function": "add", "arguments": {"x": 5, "y": 3}}` + functionConfig.ParallelCalls = false + functionConfig.NoGrammar = false + + results := ParseFunctionCall(input, functionConfig) + Expect(results).To(HaveLen(1)) + Expect(results[0].Name).To(Equal("add")) + Expect(results[0].Arguments).To(Equal(`{"x":5,"y":3}`)) + }) + }) + + Context("when not using grammars and regex is needed", func() { + It("should extract function name and arguments from the regex", func() { + input := `add({"x":5,"y":3})` + functionConfig.NoGrammar = true + + results := ParseFunctionCall(input, functionConfig) + Expect(results).To(HaveLen(1)) + Expect(results[0].Name).To(Equal("add")) + Expect(results[0].Arguments).To(Equal(`{"x":5,"y":3}`)) + }) + }) + + Context("when having invalid input", func() { + It("returns no results when there is no input", func() { + input := "" + functionConfig.NoGrammar = true + + results := ParseFunctionCall(input, functionConfig) + Expect(results).To(HaveLen(0)) + + functionConfig.NoGrammar = false + + results = ParseFunctionCall(input, functionConfig) + Expect(results).To(HaveLen(0)) + }) + It("returns no results when is invalid", func() { + input := "invalid input" + functionConfig.NoGrammar = true + + results := ParseFunctionCall(input, functionConfig) + Expect(results).To(HaveLen(0)) + functionConfig.NoGrammar = false + + results = ParseFunctionCall(input, functionConfig) + Expect(results).To(HaveLen(0)) + }) + }) + Context("when parallel calls are enabled", func() { + It("should handle multiple function calls", func() { + input := `[{"function": "add", "arguments": {"x": 5, "y": 3}}, {"function": "subtract", "arguments": {"x": 10, "y": 7}}]` + functionConfig.ParallelCalls = true + functionConfig.NoGrammar = false + + results := ParseFunctionCall(input, functionConfig) + Expect(results).To(HaveLen(2)) + Expect(results[0].Name).To(Equal("add")) + Expect(results[0].Arguments).To(Equal(`{"x":5,"y":3}`)) + Expect(results[1].Name).To(Equal("subtract")) + Expect(results[1].Arguments).To(Equal(`{"x":10,"y":7}`)) + }) + }) +}) diff --git a/pkg/model/loader.go b/pkg/model/loader.go index 003d8327..f3182940 100644 --- a/pkg/model/loader.go +++ b/pkg/model/loader.go @@ -11,7 +11,7 @@ import ( "text/template" "github.com/Masterminds/sprig/v3" - grammar "github.com/go-skynet/LocalAI/pkg/grammar" + "github.com/go-skynet/LocalAI/pkg/functions" "github.com/go-skynet/LocalAI/pkg/grpc" process "github.com/mudler/go-processmanager" "github.com/rs/zerolog/log" @@ -25,7 +25,7 @@ type PromptTemplateData struct { SuppressSystemPrompt bool // used by chat specifically to indicate that SystemPrompt above should be _ignored_ Input string Instruction string - Functions []grammar.Function + Functions []functions.Function MessageIndex int } From e9448005a50bf966248ea34fbc0a63c23a43e4fb Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 18 Apr 2024 23:30:55 +0200 Subject: [PATCH 0079/2648] :arrow_up: Update ggerganov/llama.cpp (#2051) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d236f860..e2bfa594 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=7593639ce335e8d7f89aa9a54d616951f273af60 +CPPLLAMA_VERSION?=0d56246f4b9764158525d894b96606f6163c53a8 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 852316c5a61fa8430299717912a2fd62f23fd572 Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Thu, 18 Apr 2024 19:52:34 -0500 Subject: [PATCH 0080/2648] fix: move the GRPC cache generation workflow into it's own concurrency group (#2071) Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- .github/workflows/generate_grpc_cache.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/generate_grpc_cache.yaml b/.github/workflows/generate_grpc_cache.yaml index 11abc10a..c6b080b5 100644 --- a/.github/workflows/generate_grpc_cache.yaml +++ b/.github/workflows/generate_grpc_cache.yaml @@ -4,7 +4,7 @@ on: - workflow_dispatch concurrency: - group: ci-${{ github.head_ref || github.ref }}-${{ github.repository }} + group: grpc-cache-${{ github.head_ref || github.ref }}-${{ github.repository }} cancel-in-progress: true jobs: From 27ec84827c40a81663ef4df51c5e9e30bbb458c9 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 19 Apr 2024 04:40:18 +0200 Subject: [PATCH 0081/2648] refactor(template): isolate and add tests (#2069) * refactor(template): isolate and add tests Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto Signed-off-by: Dave Co-authored-by: Dave --- pkg/model/loader.go | 111 +++++------------------------- pkg/model/loader_test.go | 7 +- pkg/templates/cache.go | 103 +++++++++++++++++++++++++++ pkg/templates/cache_test.go | 73 ++++++++++++++++++++ pkg/templates/utils_suite_test.go | 13 ++++ pkg/utils/path.go | 6 ++ 6 files changed, 218 insertions(+), 95 deletions(-) create mode 100644 pkg/templates/cache.go create mode 100644 pkg/templates/cache_test.go create mode 100644 pkg/templates/utils_suite_test.go diff --git a/pkg/model/loader.go b/pkg/model/loader.go index f3182940..1b5c9aa0 100644 --- a/pkg/model/loader.go +++ b/pkg/model/loader.go @@ -1,18 +1,19 @@ package model import ( - "bytes" "context" "fmt" "os" "path/filepath" "strings" "sync" - "text/template" - "github.com/Masterminds/sprig/v3" + "github.com/go-skynet/LocalAI/pkg/templates" + "github.com/go-skynet/LocalAI/pkg/functions" "github.com/go-skynet/LocalAI/pkg/grpc" + "github.com/go-skynet/LocalAI/pkg/utils" + process "github.com/mudler/go-processmanager" "github.com/rs/zerolog/log" ) @@ -42,21 +43,6 @@ type ChatMessageTemplateData struct { LastMessage bool } -// Keep this in sync with config.TemplateConfig. Is there a more idiomatic way to accomplish this in go? -// Technically, order doesn't _really_ matter, but the count must stay in sync, see tests/integration/reflect_test.go -type TemplateType int - -const ( - ChatPromptTemplate TemplateType = iota - ChatMessageTemplate - CompletionPromptTemplate - EditPromptTemplate - FunctionsPromptTemplate - - // The following TemplateType is **NOT** a valid value and MUST be last. It exists to make the sanity integration tests simpler! - IntegrationTestTemplate -) - // new idea: what if we declare a struct of these here, and use a loop to check? // TODO: Split ModelLoader and TemplateLoader? Just to keep things more organized. Left together to share a mutex until I look into that. Would split if we seperate directories for .bin/.yaml and .tmpl @@ -67,7 +53,7 @@ type ModelLoader struct { grpcClients map[string]grpc.Backend models map[string]ModelAddress grpcProcesses map[string]*process.Process - templates map[TemplateType]map[string]*template.Template + templates *templates.TemplateCache wd *WatchDog } @@ -86,11 +72,10 @@ func NewModelLoader(modelPath string) *ModelLoader { ModelPath: modelPath, grpcClients: make(map[string]grpc.Backend), models: make(map[string]ModelAddress), - templates: make(map[TemplateType]map[string]*template.Template), + templates: templates.NewTemplateCache(modelPath), grpcProcesses: make(map[string]*process.Process), } - nml.initializeTemplateMap() return nml } @@ -99,7 +84,7 @@ func (ml *ModelLoader) SetWatchDog(wd *WatchDog) { } func (ml *ModelLoader) ExistsInModelPath(s string) bool { - return existsInPath(ml.ModelPath, s) + return utils.ExistsInPath(ml.ModelPath, s) } func (ml *ModelLoader) ListModels() ([]string, error) { @@ -194,82 +179,22 @@ func (ml *ModelLoader) CheckIsLoaded(s string) ModelAddress { return "" } -func (ml *ModelLoader) EvaluateTemplateForPrompt(templateType TemplateType, templateName string, in PromptTemplateData) (string, error) { +const ( + ChatPromptTemplate templates.TemplateType = iota + ChatMessageTemplate + CompletionPromptTemplate + EditPromptTemplate + FunctionsPromptTemplate +) + +func (ml *ModelLoader) EvaluateTemplateForPrompt(templateType templates.TemplateType, templateName string, in PromptTemplateData) (string, error) { // TODO: should this check be improved? if templateType == ChatMessageTemplate { return "", fmt.Errorf("invalid templateType: ChatMessage") } - return ml.evaluateTemplate(templateType, templateName, in) + return ml.templates.EvaluateTemplate(templateType, templateName, in) } func (ml *ModelLoader) EvaluateTemplateForChatMessage(templateName string, messageData ChatMessageTemplateData) (string, error) { - return ml.evaluateTemplate(ChatMessageTemplate, templateName, messageData) -} - -func existsInPath(path string, s string) bool { - _, err := os.Stat(filepath.Join(path, s)) - return err == nil -} - -func (ml *ModelLoader) initializeTemplateMap() { - // This also seems somewhat clunky as we reference the Test / End of valid data value slug, but it works? - for tt := TemplateType(0); tt < IntegrationTestTemplate; tt++ { - ml.templates[tt] = make(map[string]*template.Template) - } -} - -func (ml *ModelLoader) evaluateTemplate(templateType TemplateType, templateName string, in interface{}) (string, error) { - ml.mu.Lock() - defer ml.mu.Unlock() - - m, ok := ml.templates[templateType][templateName] - if !ok { - // return "", fmt.Errorf("template not loaded: %s", templateName) - loadErr := ml.loadTemplateIfExists(templateType, templateName) - if loadErr != nil { - return "", loadErr - } - m = ml.templates[templateType][templateName] // ok is not important since we check m on the next line, and wealready checked - } - if m == nil { - return "", fmt.Errorf("failed loading a template for %s", templateName) - } - - var buf bytes.Buffer - - if err := m.Execute(&buf, in); err != nil { - return "", err - } - return buf.String(), nil -} - -func (ml *ModelLoader) loadTemplateIfExists(templateType TemplateType, templateName string) error { - // Check if the template was already loaded - if _, ok := ml.templates[templateType][templateName]; ok { - return nil - } - - // Check if the model path exists - // skip any error here - we run anyway if a template does not exist - modelTemplateFile := fmt.Sprintf("%s.tmpl", templateName) - - dat := "" - if ml.ExistsInModelPath(modelTemplateFile) { - d, err := os.ReadFile(filepath.Join(ml.ModelPath, modelTemplateFile)) - if err != nil { - return err - } - dat = string(d) - } else { - dat = templateName - } - - // Parse the template - tmpl, err := template.New("prompt").Funcs(sprig.FuncMap()).Parse(dat) - if err != nil { - return err - } - ml.templates[templateType][templateName] = tmpl - - return nil + return ml.templates.EvaluateTemplate(ChatMessageTemplate, templateName, messageData) } diff --git a/pkg/model/loader_test.go b/pkg/model/loader_test.go index 4c3c1a88..e4207b35 100644 --- a/pkg/model/loader_test.go +++ b/pkg/model/loader_test.go @@ -92,10 +92,13 @@ var testMatch map[string]map[string]interface{} = map[string]map[string]interfac var _ = Describe("Templates", func() { Context("chat message", func() { - modelLoader := NewModelLoader("") + var modelLoader *ModelLoader + BeforeEach(func() { + modelLoader = NewModelLoader("") + }) for key := range testMatch { foo := testMatch[key] - It("renders correctly "+key, func() { + It("renders correctly `"+key+"`", func() { templated, err := modelLoader.EvaluateTemplateForChatMessage(foo["template"].(string), foo["data"].(model.ChatMessageTemplateData)) Expect(err).ToNot(HaveOccurred()) Expect(templated).To(Equal(foo["expected"]), templated) diff --git a/pkg/templates/cache.go b/pkg/templates/cache.go new file mode 100644 index 00000000..9ff55605 --- /dev/null +++ b/pkg/templates/cache.go @@ -0,0 +1,103 @@ +package templates + +import ( + "bytes" + "fmt" + "os" + "path/filepath" + "sync" + "text/template" + + "github.com/go-skynet/LocalAI/pkg/utils" + + "github.com/Masterminds/sprig/v3" +) + +// Keep this in sync with config.TemplateConfig. Is there a more idiomatic way to accomplish this in go? +// Technically, order doesn't _really_ matter, but the count must stay in sync, see tests/integration/reflect_test.go +type TemplateType int + +type TemplateCache struct { + mu sync.Mutex + templatesPath string + templates map[TemplateType]map[string]*template.Template +} + +func NewTemplateCache(templatesPath string) *TemplateCache { + tc := &TemplateCache{ + templatesPath: templatesPath, + templates: make(map[TemplateType]map[string]*template.Template), + } + return tc +} + +func (tc *TemplateCache) initializeTemplateMapKey(tt TemplateType) { + if _, ok := tc.templates[tt]; !ok { + tc.templates[tt] = make(map[string]*template.Template) + } +} + +func (tc *TemplateCache) EvaluateTemplate(templateType TemplateType, templateName string, in interface{}) (string, error) { + tc.mu.Lock() + defer tc.mu.Unlock() + + tc.initializeTemplateMapKey(templateType) + m, ok := tc.templates[templateType][templateName] + if !ok { + // return "", fmt.Errorf("template not loaded: %s", templateName) + loadErr := tc.loadTemplateIfExists(templateType, templateName) + if loadErr != nil { + return "", loadErr + } + m = tc.templates[templateType][templateName] // ok is not important since we check m on the next line, and wealready checked + } + if m == nil { + return "", fmt.Errorf("failed loading a template for %s", templateName) + } + + var buf bytes.Buffer + + if err := m.Execute(&buf, in); err != nil { + return "", err + } + return buf.String(), nil +} + +func (tc *TemplateCache) loadTemplateIfExists(templateType TemplateType, templateName string) error { + + // Check if the template was already loaded + if _, ok := tc.templates[templateType][templateName]; ok { + return nil + } + + // Check if the model path exists + // skip any error here - we run anyway if a template does not exist + modelTemplateFile := fmt.Sprintf("%s.tmpl", templateName) + + dat := "" + file := filepath.Join(tc.templatesPath, modelTemplateFile) + + // Security check + if err := utils.VerifyPath(modelTemplateFile, tc.templatesPath); err != nil { + return fmt.Errorf("template file outside path: %s", file) + } + + if utils.ExistsInPath(tc.templatesPath, modelTemplateFile) { + d, err := os.ReadFile(file) + if err != nil { + return err + } + dat = string(d) + } else { + dat = templateName + } + + // Parse the template + tmpl, err := template.New("prompt").Funcs(sprig.FuncMap()).Parse(dat) + if err != nil { + return err + } + tc.templates[templateType][templateName] = tmpl + + return nil +} diff --git a/pkg/templates/cache_test.go b/pkg/templates/cache_test.go new file mode 100644 index 00000000..83af02b2 --- /dev/null +++ b/pkg/templates/cache_test.go @@ -0,0 +1,73 @@ +package templates_test + +import ( + "os" + "path/filepath" + + "github.com/go-skynet/LocalAI/pkg/templates" // Update with your module path + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("TemplateCache", func() { + var ( + templateCache *templates.TemplateCache + tempDir string + ) + + BeforeEach(func() { + var err error + tempDir, err = os.MkdirTemp("", "templates") + Expect(err).NotTo(HaveOccurred()) + + // Writing example template files + err = os.WriteFile(filepath.Join(tempDir, "example.tmpl"), []byte("Hello, {{.Name}}!"), 0644) + Expect(err).NotTo(HaveOccurred()) + err = os.WriteFile(filepath.Join(tempDir, "empty.tmpl"), []byte(""), 0644) + Expect(err).NotTo(HaveOccurred()) + + templateCache = templates.NewTemplateCache(tempDir) + }) + + AfterEach(func() { + os.RemoveAll(tempDir) // Clean up + }) + + Describe("EvaluateTemplate", func() { + Context("when template is loaded successfully", func() { + It("should evaluate the template correctly", func() { + result, err := templateCache.EvaluateTemplate(1, "example", map[string]string{"Name": "Gopher"}) + Expect(err).NotTo(HaveOccurred()) + Expect(result).To(Equal("Hello, Gopher!")) + }) + }) + + Context("when template isn't a file", func() { + It("should parse from string", func() { + result, err := templateCache.EvaluateTemplate(1, "{{.Name}}", map[string]string{"Name": "Gopher"}) + Expect(err).ToNot(HaveOccurred()) + Expect(result).To(Equal("Gopher")) + }) + }) + + Context("when template is empty", func() { + It("should return an empty string", func() { + result, err := templateCache.EvaluateTemplate(1, "empty", nil) + Expect(err).NotTo(HaveOccurred()) + Expect(result).To(Equal("")) + }) + }) + }) + + Describe("concurrency", func() { + It("should handle multiple concurrent accesses", func(done Done) { + go func() { + _, _ = templateCache.EvaluateTemplate(1, "example", map[string]string{"Name": "Gopher"}) + }() + go func() { + _, _ = templateCache.EvaluateTemplate(1, "example", map[string]string{"Name": "Gopher"}) + }() + close(done) + }, 0.1) // timeout in seconds + }) +}) diff --git a/pkg/templates/utils_suite_test.go b/pkg/templates/utils_suite_test.go new file mode 100644 index 00000000..011ba8f6 --- /dev/null +++ b/pkg/templates/utils_suite_test.go @@ -0,0 +1,13 @@ +package templates_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestTemplates(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Templates test suite") +} diff --git a/pkg/utils/path.go b/pkg/utils/path.go index f95b0138..9982bc1e 100644 --- a/pkg/utils/path.go +++ b/pkg/utils/path.go @@ -2,10 +2,16 @@ package utils import ( "fmt" + "os" "path/filepath" "strings" ) +func ExistsInPath(path string, s string) bool { + _, err := os.Stat(filepath.Join(path, s)) + return err == nil +} + func inTrustedRoot(path string, trustedRoot string) error { for path != "/" { path = filepath.Dir(path) From b2772509b44f2a19bb5d61a19c261b2ea02dc180 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 19 Apr 2024 18:23:44 +0200 Subject: [PATCH 0082/2648] models(llama3): add llama3 to embedded models (#2074) Signed-off-by: Ettore Di Giacinto --- aio/cpu/text-to-text.yaml | 22 +++-- aio/gpu-8g/text-to-text.yaml | 22 +++-- aio/intel/text-to-text.yaml | 22 +++-- embedded/models/hermes-2-pro-mistral.yaml | 22 +++-- embedded/models/llama3-instruct.yaml | 48 +++++++++++ pkg/model/loader_test.go | 99 ++++++++++++++++++++++- 6 files changed, 203 insertions(+), 32 deletions(-) create mode 100644 embedded/models/llama3-instruct.yaml diff --git a/aio/cpu/text-to-text.yaml b/aio/cpu/text-to-text.yaml index 6c4ec9e6..cf18f659 100644 --- a/aio/cpu/text-to-text.yaml +++ b/aio/cpu/text-to-text.yaml @@ -6,14 +6,22 @@ parameters: template: chat_message: | <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} - {{- if .FunctionCall }}{{end}} - {{- if eq .RoleName "tool" }}{{end }} - {{- if .Content}} - {{.Content}} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + + {{- end }} + {{- if .Content}} + {{.Content }} + {{- end }} + {{- if .FunctionCall}} + {{toJson .FunctionCall}} + {{- end }} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + {{- end }} - {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }} - {{- if .FunctionCall }}{{end }} - {{- if eq .RoleName "tool" }}{{end }} <|im_end|> # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling function: | diff --git a/aio/gpu-8g/text-to-text.yaml b/aio/gpu-8g/text-to-text.yaml index 8d5c84f7..0407bb22 100644 --- a/aio/gpu-8g/text-to-text.yaml +++ b/aio/gpu-8g/text-to-text.yaml @@ -6,14 +6,22 @@ parameters: template: chat_message: | <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} - {{- if .FunctionCall }}{{end}} - {{- if eq .RoleName "tool" }}{{end }} - {{- if .Content}} - {{.Content}} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + + {{- end }} + {{- if .Content}} + {{.Content }} + {{- end }} + {{- if .FunctionCall}} + {{toJson .FunctionCall}} + {{- end }} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + {{- end }} - {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }} - {{- if .FunctionCall }}{{end }} - {{- if eq .RoleName "tool" }}{{end }} <|im_end|> # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling function: | diff --git a/aio/intel/text-to-text.yaml b/aio/intel/text-to-text.yaml index a7cb5b4d..f5f93c14 100644 --- a/aio/intel/text-to-text.yaml +++ b/aio/intel/text-to-text.yaml @@ -7,14 +7,22 @@ parameters: template: chat_message: | <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} - {{- if .FunctionCall }}{{end}} - {{- if eq .RoleName "tool" }}{{end }} - {{- if .Content}} - {{.Content}} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + + {{- end }} + {{- if .Content}} + {{.Content }} + {{- end }} + {{- if .FunctionCall}} + {{toJson .FunctionCall}} + {{- end }} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + {{- end }} - {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }} - {{- if .FunctionCall }}{{end }} - {{- if eq .RoleName "tool" }}{{end }} <|im_end|> # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling function: | diff --git a/embedded/models/hermes-2-pro-mistral.yaml b/embedded/models/hermes-2-pro-mistral.yaml index 7bfa9418..dd18ce6f 100644 --- a/embedded/models/hermes-2-pro-mistral.yaml +++ b/embedded/models/hermes-2-pro-mistral.yaml @@ -6,14 +6,22 @@ parameters: template: chat_message: | <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} - {{- if .FunctionCall }}{{end}} - {{- if eq .RoleName "tool" }}{{end }} - {{- if .Content}} - {{.Content}} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + + {{- end }} + {{- if .Content}} + {{.Content }} + {{- end }} + {{- if .FunctionCall}} + {{toJson .FunctionCall}} + {{- end }} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + {{- end }} - {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }} - {{- if .FunctionCall }}{{end }} - {{- if eq .RoleName "tool" }}{{end }} <|im_end|> # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling function: | diff --git a/embedded/models/llama3-instruct.yaml b/embedded/models/llama3-instruct.yaml new file mode 100644 index 00000000..d483d2b2 --- /dev/null +++ b/embedded/models/llama3-instruct.yaml @@ -0,0 +1,48 @@ +name: llama3-8b-instruct +mmap: true +parameters: + model: huggingface://second-state/Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct-Q5_K_M.gguf + +template: + chat_message: | + <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|> + + {{ if .FunctionCall -}} + Function call: + {{ else if eq .RoleName "tool" -}} + Function response: + {{ end -}} + {{ if .Content -}} + {{.Content -}} + {{ else if .FunctionCall -}} + {{ toJson .FunctionCall -}} + {{ end -}} + <|eot_id|> + function: | + <|start_header_id|>system<|end_header_id|> + + You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: + + {{range .Functions}} + {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} + {{end}} + + Use the following pydantic model json schema for each tool call you will make: + {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}<|eot_id|><|start_header_id|>assistant<|end_header_id|> + Function call: + chat: | + <|begin_of_text|>{{.Input }} + <|start_header_id|>assistant<|end_header_id|> + completion: | + {{.Input}} +context_size: 8192 +f16: true +stopwords: +- <|im_end|> +- +- "<|eot_id|>" +usage: | + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "llama3-8b-instruct", + "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] + }' diff --git a/pkg/model/loader_test.go b/pkg/model/loader_test.go index e4207b35..d3956b63 100644 --- a/pkg/model/loader_test.go +++ b/pkg/model/loader_test.go @@ -27,7 +27,84 @@ const chatML = `<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq {{- end }} <|im_end|>` -var testMatch map[string]map[string]interface{} = map[string]map[string]interface{}{ +const llama3 = `<|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|> + +{{ if .FunctionCall -}} +Function call: +{{ else if eq .RoleName "tool" -}} +Function response: +{{ end -}} +{{ if .Content -}} +{{.Content -}} +{{ else if .FunctionCall -}} +{{ toJson .FunctionCall -}} +{{ end -}} +<|eot_id|>` + +var llama3TestMatch map[string]map[string]interface{} = map[string]map[string]interface{}{ + "user": { + "template": llama3, + "expected": "<|start_header_id|>user<|end_header_id|>\n\nA long time ago in a galaxy far, far away...<|eot_id|>", + "data": model.ChatMessageTemplateData{ + SystemPrompt: "", + Role: "user", + RoleName: "user", + Content: "A long time ago in a galaxy far, far away...", + FunctionCall: nil, + FunctionName: "", + LastMessage: false, + Function: false, + MessageIndex: 0, + }, + }, + "assistant": { + "template": llama3, + "expected": "<|start_header_id|>assistant<|end_header_id|>\n\nA long time ago in a galaxy far, far away...<|eot_id|>", + "data": model.ChatMessageTemplateData{ + SystemPrompt: "", + Role: "assistant", + RoleName: "assistant", + Content: "A long time ago in a galaxy far, far away...", + FunctionCall: nil, + FunctionName: "", + LastMessage: false, + Function: false, + MessageIndex: 0, + }, + }, + "function_call": { + "template": llama3, + "expected": "<|start_header_id|>assistant<|end_header_id|>\n\nFunction call:\n{\"function\":\"test\"}<|eot_id|>", + "data": model.ChatMessageTemplateData{ + SystemPrompt: "", + Role: "assistant", + RoleName: "assistant", + Content: "", + FunctionCall: map[string]string{"function": "test"}, + FunctionName: "", + LastMessage: false, + Function: false, + MessageIndex: 0, + }, + }, + "function_response": { + "template": llama3, + "expected": "<|start_header_id|>tool<|end_header_id|>\n\nFunction response:\nResponse from tool<|eot_id|>", + "data": model.ChatMessageTemplateData{ + SystemPrompt: "", + Role: "tool", + RoleName: "tool", + Content: "Response from tool", + FunctionCall: nil, + FunctionName: "", + LastMessage: false, + Function: false, + MessageIndex: 0, + }, + }, +} + +var chatMLTestMatch map[string]map[string]interface{} = map[string]map[string]interface{}{ "user": { "template": chatML, "expected": "<|im_start|>user\nA long time ago in a galaxy far, far away...\n<|im_end|>", @@ -91,13 +168,27 @@ var testMatch map[string]map[string]interface{} = map[string]map[string]interfac } var _ = Describe("Templates", func() { - Context("chat message", func() { + Context("chat message ChatML", func() { var modelLoader *ModelLoader BeforeEach(func() { modelLoader = NewModelLoader("") }) - for key := range testMatch { - foo := testMatch[key] + for key := range chatMLTestMatch { + foo := chatMLTestMatch[key] + It("renders correctly `"+key+"`", func() { + templated, err := modelLoader.EvaluateTemplateForChatMessage(foo["template"].(string), foo["data"].(model.ChatMessageTemplateData)) + Expect(err).ToNot(HaveOccurred()) + Expect(templated).To(Equal(foo["expected"]), templated) + }) + } + }) + Context("chat message llama3", func() { + var modelLoader *ModelLoader + BeforeEach(func() { + modelLoader = NewModelLoader("") + }) + for key := range llama3TestMatch { + foo := llama3TestMatch[key] It("renders correctly `"+key+"`", func() { templated, err := modelLoader.EvaluateTemplateForChatMessage(foo["template"].(string), foo["data"].(model.ChatMessageTemplateData)) Expect(err).ToNot(HaveOccurred()) From 1e3710193065cf79640cbe10bb0c1440313da6b7 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sat, 20 Apr 2024 02:05:16 +0200 Subject: [PATCH 0083/2648] :arrow_up: Update ggerganov/llama.cpp (#2080) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e2bfa594..b9af4612 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=0d56246f4b9764158525d894b96606f6163c53a8 +CPPLLAMA_VERSION?=0e4802b2ecbaab04b4f829fde4a3096ca19c84b5 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From b9e770864356e82b8720be246e169ba2abedbf08 Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Fri, 19 Apr 2024 19:31:15 -0500 Subject: [PATCH 0084/2648] feat: enable polling configs for systems with broken fsnotify (docker volumes on windows) (#2081) * feat: enable polling configs for systems with broken fsnotify (docker volumes on windows) Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: update logging to make it clear that the config file is being polled Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --------- Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- core/cli/run.go | 16 +++++++++------- core/config/application_config.go | 7 +++++++ core/startup/config_file_watcher.go | 15 +++++++++++++++ docs/content/docs/advanced/advanced-usage.md | 1 + 4 files changed, 32 insertions(+), 7 deletions(-) diff --git a/core/cli/run.go b/core/cli/run.go index d729f946..02d863cd 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -14,13 +14,14 @@ import ( type RunCMD struct { ModelArgs []string `arg:"" optional:"" name:"models" help:"Model configuration URLs to load"` - ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"` - BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"` - ImagePath string `env:"LOCALAI_IMAGE_PATH,IMAGE_PATH" type:"path" default:"/tmp/generated/images" help:"Location for images generated by backends (e.g. stablediffusion)" group:"storage"` - AudioPath string `env:"LOCALAI_AUDIO_PATH,AUDIO_PATH" type:"path" default:"/tmp/generated/audio" help:"Location for audio generated by backends (e.g. piper)" group:"storage"` - UploadPath string `env:"LOCALAI_UPLOAD_PATH,UPLOAD_PATH" type:"path" default:"/tmp/localai/upload" help:"Path to store uploads from files api" group:"storage"` - ConfigPath string `env:"LOCALAI_CONFIG_PATH,CONFIG_PATH" default:"/tmp/localai/config" group:"storage"` - LocalaiConfigDir string `env:"LOCALAI_CONFIG_DIR" type:"path" default:"${basepath}/configuration" help:"Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json)" group:"storage"` + ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"` + BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"` + ImagePath string `env:"LOCALAI_IMAGE_PATH,IMAGE_PATH" type:"path" default:"/tmp/generated/images" help:"Location for images generated by backends (e.g. stablediffusion)" group:"storage"` + AudioPath string `env:"LOCALAI_AUDIO_PATH,AUDIO_PATH" type:"path" default:"/tmp/generated/audio" help:"Location for audio generated by backends (e.g. piper)" group:"storage"` + UploadPath string `env:"LOCALAI_UPLOAD_PATH,UPLOAD_PATH" type:"path" default:"/tmp/localai/upload" help:"Path to store uploads from files api" group:"storage"` + ConfigPath string `env:"LOCALAI_CONFIG_PATH,CONFIG_PATH" default:"/tmp/localai/config" group:"storage"` + LocalaiConfigDir string `env:"LOCALAI_CONFIG_DIR" type:"path" default:"${basepath}/configuration" help:"Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json)" group:"storage"` + LocalaiConfigDirPollInterval time.Duration `env:"LOCALAI_CONFIG_DIR_POLL_INTERVAL" help:"Typically the config path picks up changes automatically, but if your system has broken fsnotify events, set this to an interval to poll the LocalAI Config Dir (example: 1m)" group:"storage"` // The alias on this option is there to preserve functionality with the old `--config-file` parameter ModelsConfigFile string `env:"LOCALAI_MODELS_CONFIG_FILE,CONFIG_FILE" aliases:"config-file" help:"YAML file containing a list of model backend configs" group:"storage"` @@ -65,6 +66,7 @@ func (r *RunCMD) Run(ctx *Context) error { config.WithUploadDir(r.UploadPath), config.WithConfigsDir(r.ConfigPath), config.WithDynamicConfigDir(r.LocalaiConfigDir), + config.WithDynamicConfigDirPollInterval(r.LocalaiConfigDirPollInterval), config.WithF16(r.F16), config.WithStringGalleries(r.Galleries), config.WithModelLibraryURL(r.RemoteLibrary), diff --git a/core/config/application_config.go b/core/config/application_config.go index 77817616..d4adee18 100644 --- a/core/config/application_config.go +++ b/core/config/application_config.go @@ -23,6 +23,7 @@ type ApplicationConfig struct { UploadDir string ConfigsDir string DynamicConfigsDir string + DynamicConfigsDirPollInterval time.Duration CORS bool PreloadJSONModels string PreloadModelsFromPath string @@ -271,6 +272,12 @@ func WithDynamicConfigDir(dynamicConfigsDir string) AppOption { } } +func WithDynamicConfigDirPollInterval(interval time.Duration) AppOption { + return func(o *ApplicationConfig) { + o.DynamicConfigsDirPollInterval = interval + } +} + func WithApiKeys(apiKeys []string) AppOption { return func(o *ApplicationConfig) { o.ApiKeys = apiKeys diff --git a/core/startup/config_file_watcher.go b/core/startup/config_file_watcher.go index 5d213df5..5f6834d4 100644 --- a/core/startup/config_file_watcher.go +++ b/core/startup/config_file_watcher.go @@ -5,6 +5,7 @@ import ( "fmt" "os" "path" + "time" "github.com/fsnotify/fsnotify" "github.com/go-skynet/LocalAI/core/config" @@ -66,6 +67,20 @@ func (c *configFileHandler) Watch() error { log.Fatal().Err(err).Str("configdir", c.configDir).Msg("wnable to create a watcher for configuration directory") } + if c.appConfig.DynamicConfigsDirPollInterval > 0 { + log.Debug().Msg("Poll interval set, falling back to polling for configuration changes") + ticker := time.NewTicker(c.appConfig.DynamicConfigsDirPollInterval) + go func() { + for { + <-ticker.C + for file, handler := range c.handlers { + log.Debug().Str("file", file).Msg("polling config file") + c.callHandler(file, handler) + } + } + }() + } + // Start listening for events. go func() { for { diff --git a/docs/content/docs/advanced/advanced-usage.md b/docs/content/docs/advanced/advanced-usage.md index 4bd16030..cbf7dba3 100644 --- a/docs/content/docs/advanced/advanced-usage.md +++ b/docs/content/docs/advanced/advanced-usage.md @@ -402,6 +402,7 @@ In the help text below, BASEPATH is the location that local-ai is being executed | --upload-path | /tmp/localai/upload | Path to store uploads from files api | $LOCALAI_UPLOAD_PATH | | --config-path | /tmp/localai/config | | $LOCALAI_CONFIG_PATH | | --localai-config-dir | BASEPATH/configuration | Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json) | $LOCALAI_CONFIG_DIR | +| --localai-config-dir-poll-interval | | Typically the config path picks up changes automatically, but if your system has broken fsnotify events, set this to a time duration to poll the LocalAI Config Dir (example: 1m) | $LOCALAI_CONFIG_DIR_POLL_INTERVAL | | --models-config-file | STRING | YAML file containing a list of model backend configs | $LOCALAI_MODELS_CONFIG_FILE | #### Models Flags From 1038f7469c72e44e19cabaa0af474cb75d2b6121 Mon Sep 17 00:00:00 2001 From: Dave Date: Sat, 20 Apr 2024 04:42:02 -0400 Subject: [PATCH 0085/2648] fix: action-tmate: use connect-timeout-sections and limit-access-to-actor (#2083) fix for action-tmate: connect-timeout-sections and limit-access-to-actor Signed-off-by: Dave Lee --- .github/workflows/test.yml | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 46c4e065..9eb4f084 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -121,8 +121,10 @@ jobs: PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test - name: Setup tmate session if tests fail if: ${{ failure() }} - uses: mxschmitt/action-tmate@v3 - timeout-minutes: 5 + uses: mxschmitt/action-tmate@v3.18 + with: + connect-timeout-seconds: 180 + limit-access-to-actor: true tests-aio-container: runs-on: ubuntu-latest @@ -173,8 +175,10 @@ jobs: make run-e2e-aio - name: Setup tmate session if tests fail if: ${{ failure() }} - uses: mxschmitt/action-tmate@v3 - timeout-minutes: 5 + uses: mxschmitt/action-tmate@v3.18 + with: + connect-timeout-seconds: 180 + limit-access-to-actor: true tests-apple: runs-on: macOS-14 @@ -207,5 +211,7 @@ jobs: BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make --jobs 4 --output-sync=target test - name: Setup tmate session if tests fail if: ${{ failure() }} - uses: mxschmitt/action-tmate@v3 - timeout-minutes: 5 \ No newline at end of file + uses: mxschmitt/action-tmate@v3.18 + with: + connect-timeout-seconds: 180 + limit-access-to-actor: true \ No newline at end of file From 8d30b39811fa1a00e9b8443a0b9f1db6e5609b5a Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Sat, 20 Apr 2024 03:43:37 -0500 Subject: [PATCH 0086/2648] feat: fiber logs with zerlog and add trace level (#2082) Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- core/cli/cli.go | 2 +- core/cli/run.go | 5 ++--- core/config/application_config.go | 19 ++++++------------ core/http/api.go | 33 +++++++++++++++++++++---------- core/http/api_test.go | 1 - core/startup/startup.go | 6 ------ go.mod | 3 ++- go.sum | 4 ++++ main.go | 11 +++++++---- 9 files changed, 45 insertions(+), 39 deletions(-) diff --git a/core/cli/cli.go b/core/cli/cli.go index 5e757f64..2f2dcd8b 100644 --- a/core/cli/cli.go +++ b/core/cli/cli.go @@ -4,7 +4,7 @@ import "embed" type Context struct { Debug bool `env:"LOCALAI_DEBUG,DEBUG" default:"false" hidden:"" help:"DEPRECATED, use --log-level=debug instead. Enable debug logging"` - LogLevel *string `env:"LOCALAI_LOG_LEVEL" enum:"error,warn,info,debug" help:"Set the level of logs to output [${enum}]"` + LogLevel *string `env:"LOCALAI_LOG_LEVEL" enum:"error,warn,info,debug,trace" help:"Set the level of logs to output [${enum}]"` // This field is not a command line argument/flag, the struct tag excludes it from the parsed CLI BackendAssets embed.FS `kong:"-"` diff --git a/core/cli/run.go b/core/cli/run.go index 02d863cd..16e65725 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -8,6 +8,7 @@ import ( "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/http" "github.com/go-skynet/LocalAI/core/startup" + "github.com/rs/zerolog" "github.com/rs/zerolog/log" ) @@ -60,7 +61,7 @@ func (r *RunCMD) Run(ctx *Context) error { config.WithYAMLConfigPreload(r.PreloadModelsConfig), config.WithModelPath(r.ModelsPath), config.WithContextSize(r.ContextSize), - config.WithDebug(*ctx.LogLevel == "debug"), + config.WithDebug(zerolog.GlobalLevel() <= zerolog.DebugLevel), config.WithImageDir(r.ImagePath), config.WithAudioDir(r.AudioPath), config.WithUploadDir(r.UploadPath), @@ -70,7 +71,6 @@ func (r *RunCMD) Run(ctx *Context) error { config.WithF16(r.F16), config.WithStringGalleries(r.Galleries), config.WithModelLibraryURL(r.RemoteLibrary), - config.WithDisableMessage(false), config.WithCors(r.CORS), config.WithCorsAllowOrigins(r.CORSAllowOrigins), config.WithThreads(r.Threads), @@ -131,7 +131,6 @@ func (r *RunCMD) Run(ctx *Context) error { } cl, ml, options, err := startup.Startup(opts...) - if err != nil { return fmt.Errorf("failed basic startup tasks with error %s", err.Error()) } diff --git a/core/config/application_config.go b/core/config/application_config.go index d4adee18..2d733c1e 100644 --- a/core/config/application_config.go +++ b/core/config/application_config.go @@ -17,7 +17,7 @@ type ApplicationConfig struct { UploadLimitMB, Threads, ContextSize int DisableWelcomePage bool F16 bool - Debug, DisableMessage bool + Debug bool ImageDir string AudioDir string UploadDir string @@ -57,12 +57,11 @@ type AppOption func(*ApplicationConfig) func NewApplicationConfig(o ...AppOption) *ApplicationConfig { opt := &ApplicationConfig{ - Context: context.Background(), - UploadLimitMB: 15, - Threads: 1, - ContextSize: 512, - Debug: true, - DisableMessage: true, + Context: context.Background(), + UploadLimitMB: 15, + Threads: 1, + ContextSize: 512, + Debug: true, } for _, oo := range o { oo(opt) @@ -236,12 +235,6 @@ func WithDebug(debug bool) AppOption { } } -func WithDisableMessage(disableMessage bool) AppOption { - return func(o *ApplicationConfig) { - o.DisableMessage = disableMessage - } -} - func WithAudioDir(audioDir string) AppOption { return func(o *ApplicationConfig) { o.AudioDir = audioDir diff --git a/core/http/api.go b/core/http/api.go index af38512a..fe8f711c 100644 --- a/core/http/api.go +++ b/core/http/api.go @@ -7,7 +7,6 @@ import ( "strings" "github.com/go-skynet/LocalAI/pkg/utils" - "github.com/gofiber/swagger" // swagger handler "github.com/go-skynet/LocalAI/core/http/endpoints/elevenlabs" "github.com/go-skynet/LocalAI/core/http/endpoints/localai" @@ -19,10 +18,13 @@ import ( "github.com/go-skynet/LocalAI/internal" "github.com/go-skynet/LocalAI/pkg/model" + "github.com/gofiber/contrib/fiberzerolog" "github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2/middleware/cors" - "github.com/gofiber/fiber/v2/middleware/logger" "github.com/gofiber/fiber/v2/middleware/recover" + "github.com/gofiber/swagger" // swagger handler + + "github.com/rs/zerolog/log" ) func readAuthHeader(c *fiber.Ctx) string { @@ -59,9 +61,11 @@ func readAuthHeader(c *fiber.Ctx) string { func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (*fiber.App, error) { // Return errors as JSON responses app := fiber.New(fiber.Config{ - Views: renderEngine(), - BodyLimit: appConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB - DisableStartupMessage: appConfig.DisableMessage, + Views: renderEngine(), + BodyLimit: appConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB + // We disable the Fiber startup message as it does not conform to structured logging. + // We register a startup log line with connection information in the OnListen hook to keep things user friendly though + DisableStartupMessage: true, // Override default error handler ErrorHandler: func(ctx *fiber.Ctx, err error) error { // Status code defaults to 500 @@ -82,11 +86,20 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi }, }) - if appConfig.Debug { - app.Use(logger.New(logger.Config{ - Format: "[${ip}]:${port} ${status} - ${method} ${path}\n", - })) - } + app.Hooks().OnListen(func(listenData fiber.ListenData) error { + scheme := "http" + if listenData.TLS { + scheme = "https" + } + log.Info().Str("endpoint", scheme+"://"+listenData.Host+":"+listenData.Port).Msg("LocalAI API is listening! Please connect to the endpoint for API documentation.") + return nil + }) + + // Have Fiber use zerolog like the rest of the application rather than it's built-in logger + logger := log.Logger + app.Use(fiberzerolog.New(fiberzerolog.Config{ + Logger: &logger, + })) // Default middleware config diff --git a/core/http/api_test.go b/core/http/api_test.go index 1553ed21..35e0a8bf 100644 --- a/core/http/api_test.go +++ b/core/http/api_test.go @@ -211,7 +211,6 @@ var _ = Describe("API test", func() { commonOpts := []config.AppOption{ config.WithDebug(true), - config.WithDisableMessage(true), } Context("API with ephemeral models", func() { diff --git a/core/startup/startup.go b/core/startup/startup.go index af92f0e1..97882a22 100644 --- a/core/startup/startup.go +++ b/core/startup/startup.go @@ -10,18 +10,12 @@ import ( "github.com/go-skynet/LocalAI/pkg/assets" "github.com/go-skynet/LocalAI/pkg/model" pkgStartup "github.com/go-skynet/LocalAI/pkg/startup" - "github.com/rs/zerolog" "github.com/rs/zerolog/log" ) func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.ModelLoader, *config.ApplicationConfig, error) { options := config.NewApplicationConfig(opts...) - zerolog.SetGlobalLevel(zerolog.InfoLevel) - if options.Debug { - zerolog.SetGlobalLevel(zerolog.DebugLevel) - } - log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.ModelPath) log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion()) diff --git a/go.mod b/go.mod index 99af8ce7..0bf9aa02 100644 --- a/go.mod +++ b/go.mod @@ -29,7 +29,7 @@ require ( github.com/otiai10/openaigo v1.6.0 github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5 github.com/prometheus/client_golang v1.17.0 - github.com/rs/zerolog v1.31.0 + github.com/rs/zerolog v1.32.0 github.com/russross/blackfriday v1.6.0 github.com/sashabaranov/go-openai v1.20.4 github.com/schollz/progressbar/v3 v3.13.1 @@ -145,6 +145,7 @@ require ( github.com/go-audio/riff v1.0.0 // indirect github.com/go-logr/logr v1.2.4 // indirect github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect + github.com/gofiber/contrib/fiberzerolog v1.0.0 github.com/google/go-cmp v0.6.0 // indirect github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect github.com/hashicorp/errwrap v1.0.0 // indirect diff --git a/go.sum b/go.sum index a421e79c..55fdaf06 100644 --- a/go.sum +++ b/go.sum @@ -100,6 +100,8 @@ github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg78 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= +github.com/gofiber/contrib/fiberzerolog v1.0.0 h1:IB8q+NO2zPNS4VHKde1x5DqtMJ5vGrvDCydnAjlFw3E= +github.com/gofiber/contrib/fiberzerolog v1.0.0/go.mod h1:SOi+Wo7RQlO/HV0jsYTu6uFQy+8ZPTzCZW4fDEKD3l8= github.com/gofiber/fiber/v2 v2.52.4 h1:P+T+4iK7VaqUsq2PALYEfBBo6bJZ4q3FP8cZ84EggTM= github.com/gofiber/fiber/v2 v2.52.4/go.mod h1:KEOE+cXMhXG0zHc9d8+E38hoX+ZN7bhOtgeF2oT6jrQ= github.com/gofiber/swagger v1.0.0 h1:BzUzDS9ZT6fDUa692kxmfOjc1DZiloLiPK/W5z1H1tc= @@ -281,6 +283,8 @@ github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUz github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= github.com/rs/zerolog v1.31.0 h1:FcTR3NnLWW+NnTwwhFWiJSZr4ECLpqCm6QsEnyvbV4A= github.com/rs/zerolog v1.31.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss= +github.com/rs/zerolog v1.32.0 h1:keLypqrlIjaFsbmJOBdB/qvyF8KEtCWHwobLp5l/mQ0= +github.com/rs/zerolog v1.32.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss= github.com/russross/blackfriday v1.6.0 h1:KqfZb0pUVN2lYqZUYRddxF4OR8ZMURnJIG5Y3VRLtww= github.com/russross/blackfriday v1.6.0/go.mod h1:ti0ldHuxg49ri4ksnFxlkCfN+hvslNlmVHqNRXXJNAY= github.com/sashabaranov/go-openai v1.20.4 h1:095xQ/fAtRa0+Rj21sezVJABgKfGPNbyx/sAN/hJUmg= diff --git a/main.go b/main.go index 8b5696d1..0b40175e 100644 --- a/main.go +++ b/main.go @@ -91,17 +91,20 @@ Version: ${version} switch *cli.CLI.LogLevel { case "error": - log.Info().Msg("Setting logging to error") zerolog.SetGlobalLevel(zerolog.ErrorLevel) + log.Info().Msg("Setting logging to error") case "warn": - log.Info().Msg("Setting logging to warn") zerolog.SetGlobalLevel(zerolog.WarnLevel) + log.Info().Msg("Setting logging to warn") case "info": - log.Info().Msg("Setting logging to info") zerolog.SetGlobalLevel(zerolog.InfoLevel) + log.Info().Msg("Setting logging to info") case "debug": - log.Info().Msg("Setting logging to debug") zerolog.SetGlobalLevel(zerolog.DebugLevel) + log.Debug().Msg("Setting logging to debug") + case "trace": + zerolog.SetGlobalLevel(zerolog.TraceLevel) + log.Trace().Msg("Setting logging to trace") } // Populate the application with the embedded backend assets From b319ed58b026f91f48599c62c85eec5fbbc8764b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 20 Apr 2024 15:22:54 +0200 Subject: [PATCH 0087/2648] models(gallery): add gallery (#2078) Signed-off-by: Ettore Di Giacinto --- core/cli/run.go | 2 +- gallery/bert-embeddings.yaml | 15 ++ gallery/index.yaml | 503 +++++++++++++++++++++++++++++++++++ gallery/stablediffusion.yaml | 54 ++++ gallery/tinydream.yaml | 42 +++ gallery/virtual.yaml | 6 + gallery/whisper-base.yaml | 18 ++ main.go | 1 + 8 files changed, 640 insertions(+), 1 deletion(-) create mode 100644 gallery/bert-embeddings.yaml create mode 100644 gallery/index.yaml create mode 100644 gallery/stablediffusion.yaml create mode 100644 gallery/tinydream.yaml create mode 100644 gallery/virtual.yaml create mode 100644 gallery/whisper-base.yaml diff --git a/core/cli/run.go b/core/cli/run.go index 16e65725..42185a28 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -26,7 +26,7 @@ type RunCMD struct { // The alias on this option is there to preserve functionality with the old `--config-file` parameter ModelsConfigFile string `env:"LOCALAI_MODELS_CONFIG_FILE,CONFIG_FILE" aliases:"config-file" help:"YAML file containing a list of model backend configs" group:"storage"` - Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models"` + Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"` AutoloadGalleries bool `env:"LOCALAI_AUTOLOAD_GALLERIES,AUTOLOAD_GALLERIES" group:"models"` RemoteLibrary string `env:"LOCALAI_REMOTE_LIBRARY,REMOTE_LIBRARY" default:"${remoteLibraryURL}" help:"A LocalAI remote library URL" group:"models"` PreloadModels string `env:"LOCALAI_PRELOAD_MODELS,PRELOAD_MODELS" help:"A List of models to apply in JSON at start" group:"models"` diff --git a/gallery/bert-embeddings.yaml b/gallery/bert-embeddings.yaml new file mode 100644 index 00000000..0798bf54 --- /dev/null +++ b/gallery/bert-embeddings.yaml @@ -0,0 +1,15 @@ +name: "bert-embeddings" +license: "Apache 2.0" +urls: +- https://huggingface.co/skeskinen/ggml +description: | + Bert model that can be used for embeddings +config_file: | + parameters: + model: bert-MiniLM-L6-v2q4_0.bin + backend: bert-embeddings + embeddings: true +files: +- filename: "bert-MiniLM-L6-v2q4_0.bin" + sha256: "a5a174d8772c8a569faf9f3136c441f2c3855b5bf35ed32274294219533feaad" + uri: "https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin" \ No newline at end of file diff --git a/gallery/index.yaml b/gallery/index.yaml new file mode 100644 index 00000000..6b882768 --- /dev/null +++ b/gallery/index.yaml @@ -0,0 +1,503 @@ +## Whisper +- url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master" + name: "whisper-1" + license: other +## Bert embeddings +- url: "github:mudler/LocalAI/gallery/bert-embeddings.yaml@master" + name: "bert-embeddings" + license: other +- url: "github:mudler/LocalAI/gallery/bert-embeddings.yaml@master" + name: "text-embedding-ada-002" + license: other +## Stable Diffusion +- url: github:mudler/LocalAI/gallery/stablediffusion.yaml@master + name: stablediffusion + license: other +## Tiny Dream +- url: github:mudler/LocalAI/gallery/tinydream.yaml@master + name: tinydream + license: other +## Piper TTS +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-en-us-kathleen-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-en-us-kathleen-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-kathleen-low.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-ca-upc_ona-x-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-ca-upc_ona-x-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ca-upc_ona-x-low.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-ca-upc_pau-x-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-ca-upc_pau-x-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ca-upc_pau-x-low.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-da-nst_talesyntese-medium + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-da-nst_talesyntese-medium.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-da-nst_talesyntese-medium.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-de-eva_k-x-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-de-eva_k-x-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-eva_k-x-low.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-de-karlsson-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-de-karlsson-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-karlsson-low.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-de-kerstin-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-de-kerstin-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-kerstin-low.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-de-pavoque-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-de-pavoque-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-pavoque-low.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-de-ramona-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-de-ramona-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-ramona-low.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-de-thorsten-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-de-thorsten-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-thorsten-low.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-el-gr-rapunzelina-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-el-gr-rapunzelina-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-el-gr-rapunzelina-low.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-en-gb-alan-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-en-gb-alan-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-gb-alan-low.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-en-gb-southern_english_female-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-en-gb-southern_english_female-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-gb-southern_english_female-low.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-en-us-amy-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-en-us-amy-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-en-us-danny-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-en-us-danny-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-danny-low.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-en-us-kathleen-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-en-us-kathleen-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-kathleen-low.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-en-us-lessac-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-en-us-lessac-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-lessac-low.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-en-us-lessac-medium + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-en-us-lessac-medium.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-lessac-medium.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-en-us-libritts-high + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-en-us-libritts-high.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-libritts-high.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-en-us-ryan-high + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-en-us-ryan-high.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-ryan-high.tar.gz +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-en-us-ryan-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-en-us-ryan-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-ryan-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-en-us-ryan-medium + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-en-us-ryan-medium.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-ryan-medium.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-en-us_lessac + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-en-us_lessac.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us_lessac.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-es-carlfm-x-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-es-carlfm-x-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-es-carlfm-x-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-es-mls_10246-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-es-mls_10246-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-es-mls_10246-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-es-mls_9972-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-es-mls_9972-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-es-mls_9972-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-fi-harri-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-fi-harri-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fi-harri-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-fr-gilles-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-fr-gilles-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-gilles-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-fr-mls_1840-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-fr-mls_1840-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-mls_1840-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-fr-siwis-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-fr-siwis-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-siwis-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-fr-siwis-medium + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-fr-siwis-medium.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-siwis-medium.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-is-bui-medium + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-is-bui-medium.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-is-bui-medium.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-is-salka-medium + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-is-salka-medium.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-is-salka-medium.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-is-steinn-medium + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-is-steinn-medium.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-is-steinn-medium.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-is-ugla-medium + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-is-ugla-medium.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-is-ugla-medium.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-it-riccardo_fasol-x-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-it-riccardo_fasol-x-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-it-riccardo_fasol-x-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-kk-iseke-x-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-kk-iseke-x-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-kk-iseke-x-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-kk-issai-high + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-kk-issai-high.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-kk-issai-high.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-kk-raya-x-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-kk-raya-x-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-kk-raya-x-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-ne-google-medium + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-ne-google-medium.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ne-google-medium.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-ne-google-x-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-ne-google-x-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ne-google-x-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-nl-mls_5809-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-nl-mls_5809-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-mls_5809-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-nl-mls_7432-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-nl-mls_7432-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-mls_7432-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-nl-nathalie-x-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-nl-nathalie-x-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-nathalie-x-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-nl-rdh-medium + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-nl-rdh-medium.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-rdh-medium.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-nl-rdh-x-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-nl-rdh-x-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-rdh-x-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-no-talesyntese-medium + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-no-talesyntese-medium.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-no-talesyntese-medium.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-pl-mls_6892-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-pl-mls_6892-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-pl-mls_6892-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-pt-br-edresson-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-pt-br-edresson-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-pt-br-edresson-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-ru-irinia-medium + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-ru-irinia-medium.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ru-irinia-medium.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-sv-se-nst-medium + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-sv-se-nst-medium.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-sv-se-nst-medium.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-uk-lada-x-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-uk-lada-x-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-uk-lada-x-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-vi-25hours-single-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-vi-25hours-single-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-vi-25hours-single-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-vi-vivos-x-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-vi-vivos-x-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-vi-vivos-x-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-zh-cn-huayan-x-low + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-zh-cn-huayan-x-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-zh-cn-huayan-x-low.tar.gz + +- url: github:mudler/LocalAI/gallery/virtual.yaml@master + name: voice-zh_CN-huayan-medium + license: other + urls: + - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + files: + - filename: voice-zh_CN-huayan-medium.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-zh_CN-huayan-medium.tar.gz \ No newline at end of file diff --git a/gallery/stablediffusion.yaml b/gallery/stablediffusion.yaml new file mode 100644 index 00000000..c8a0eb8b --- /dev/null +++ b/gallery/stablediffusion.yaml @@ -0,0 +1,54 @@ +name: "stablediffusion-cpp" +license: "BSD-3" +urls: +- https://github.com/EdVince/Stable-Diffusion-NCNN +- https://github.com/EdVince/Stable-Diffusion-NCNN/blob/main/LICENSE + +description: | + Stable Diffusion in NCNN with c++, supported txt2img and img2img +config_file: | + name: stablediffusion-cpp + backend: stablediffusion + parameters: + model: stablediffusion_assets + +files: +- filename: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param" + sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param" +- filename: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param" + sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param" +- filename: "stablediffusion_assets/AutoencoderKL-base-fp16.param" + sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param" +- filename: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin" + sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa" + uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-encoder-512-512-fp16.bin" +- filename: "stablediffusion_assets/AutoencoderKL-fp16.bin" + sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd" + uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-fp16.bin" +- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin" + sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6" + uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/FrozenCLIPEmbedder-fp16.bin" +- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param" + sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param" +- filename: "stablediffusion_assets/log_sigmas.bin" + sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac" + uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin" +- filename: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param" + sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param" +- filename: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param" + sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param" +- filename: "stablediffusion_assets/UNetModel-base-MHA-fp16.param" + sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param" +- filename: "stablediffusion_assets/UNetModel-MHA-fp16.bin" + sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3" + uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/UNetModel-MHA-fp16.bin" +- filename: "stablediffusion_assets/vocab.txt" + sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt" \ No newline at end of file diff --git a/gallery/tinydream.yaml b/gallery/tinydream.yaml new file mode 100644 index 00000000..415762de --- /dev/null +++ b/gallery/tinydream.yaml @@ -0,0 +1,42 @@ +name: "tinydream" +license: "BSD-3" +urls: + - https://github.com/symisc/tiny-dream + - https://github.com/symisc/tiny-dream/blob/main/LICENSE + +description: | + An embedded, Header Only, Stable Diffusion C++ implementation +config_file: | + name: tinydream + backend: tinydream + parameters: + model: tinydream_assets + +files: + - filename: "tinydream_assets/AutoencoderKL-fp16.bin" + sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd" + uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/AutoencoderKL-fp16.bin" + - filename: "tinydream_assets/AutoencoderKL-fp16.param" + sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba" + uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/AutoencoderKL-fp16.param" + - filename: "tinydream_assets/FrozenCLIPEmbedder-fp16.bin" + sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6" + uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/FrozenCLIPEmbedder-fp16.bin" + - filename: "tinydream_assets/FrozenCLIPEmbedder-fp16.param" + sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9" + uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/FrozenCLIPEmbedder-fp16.param" + - filename: "tinydream_assets/RealESRGAN_x4plus_anime.bin" + sha256: "fe01c269cfd10cdef8e018ab66ebe750cf79c7af4d1f9c16c737e1295229bacc" + uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/RealESRGAN_x4plus_anime.bin" + - filename: "tinydream_assets/RealESRGAN_x4plus_anime.param" + sha256: "2b8fb6e0ae4d2d85704ca08c119a2f5ea40add4f2ecd512eb7f4cd44b6127ed4" + uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/RealESRGAN_x4plus_anime.param" + - filename: "tinydream_assets/UNetModel-fp16.bin" + sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3" + uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/UNetModel-fp16.bin" + - filename: "tinydream_assets/UNetModel-fp16.param" + sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d" + uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/UNetModel-fp16.param" + - filename: "tinydream_assets/vocab.txt" + sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d" + uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/vocab.txt" \ No newline at end of file diff --git a/gallery/virtual.yaml b/gallery/virtual.yaml new file mode 100644 index 00000000..054c3257 --- /dev/null +++ b/gallery/virtual.yaml @@ -0,0 +1,6 @@ +name: "virtual" + +description: | + A Base model definition + +license: "N/A" \ No newline at end of file diff --git a/gallery/whisper-base.yaml b/gallery/whisper-base.yaml new file mode 100644 index 00000000..574dbb13 --- /dev/null +++ b/gallery/whisper-base.yaml @@ -0,0 +1,18 @@ +name: "whisper-base" +license: "MIT" +urls: +- https://github.com/ggerganov/whisper.cpp +- https://huggingface.co/ggerganov/whisper.cpp + +description: | + Port of OpenAI's Whisper model in C/C++ + +config_file: | + backend: whisper + parameters: + model: ggml-whisper-base.bin + +files: +- filename: "ggml-whisper-base.bin" + sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe" + uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin" \ No newline at end of file diff --git a/main.go b/main.go index 0b40175e..9976906b 100644 --- a/main.go +++ b/main.go @@ -72,6 +72,7 @@ Version: ${version} kong.Vars{ "basepath": kong.ExpandPath("."), "remoteLibraryURL": "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/model_library.yaml", + "galleries": `[{"name":"localai", "url":"github:mudler/LocalAI/gallery/index.yaml"}]`, "version": internal.PrintableVersion(), }, ) From 03adc1f60d97ae7cd5d3b1e58c5511e36c5f4eba Mon Sep 17 00:00:00 2001 From: Taikono-Himazin Date: Sat, 20 Apr 2024 23:37:02 +0900 Subject: [PATCH 0088/2648] Add tensor_parallel_size setting to vllm setting items (#2085) Signed-off-by: Taikono-Himazin --- backend/backend.proto | 1 + backend/python/vllm/backend_vllm.py | 2 ++ core/backend/options.go | 1 + core/config/backend_config.go | 1 + 4 files changed, 5 insertions(+) diff --git a/backend/backend.proto b/backend/backend.proto index 62e1a1a6..ec01e4a7 100644 --- a/backend/backend.proto +++ b/backend/backend.proto @@ -177,6 +177,7 @@ message ModelOptions { bool EnforceEager = 52; int32 SwapSpace = 53; int32 MaxModelLen = 54; + int32 TensorParallelSize = 55; string MMProj = 41; diff --git a/backend/python/vllm/backend_vllm.py b/backend/python/vllm/backend_vllm.py index ff0f0b26..2d8b55db 100644 --- a/backend/python/vllm/backend_vllm.py +++ b/backend/python/vllm/backend_vllm.py @@ -95,6 +95,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): engine_args.trust_remote_code = request.TrustRemoteCode if request.EnforceEager: engine_args.enforce_eager = request.EnforceEager + if request.TensorParallelSize: + engine_args.tensor_parallel_size = request.TensorParallelSize if request.SwapSpace != 0: engine_args.swap_space = request.SwapSpace if request.MaxModelLen != 0: diff --git a/core/backend/options.go b/core/backend/options.go index 5b303b05..60cb01ff 100644 --- a/core/backend/options.go +++ b/core/backend/options.go @@ -74,6 +74,7 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions { EnforceEager: c.EnforceEager, SwapSpace: int32(c.SwapSpace), MaxModelLen: int32(c.MaxModelLen), + TensorParallelSize: int32(c.TensorParallelSize), MMProj: c.MMProj, YarnExtFactor: c.YarnExtFactor, YarnAttnFactor: c.YarnAttnFactor, diff --git a/core/config/backend_config.go b/core/config/backend_config.go index 1161cf9f..a439ee63 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -140,6 +140,7 @@ type LLMConfig struct { EnforceEager bool `yaml:"enforce_eager"` // vLLM SwapSpace int `yaml:"swap_space"` // vLLM MaxModelLen int `yaml:"max_model_len"` // vLLM + TensorParallelSize int `yaml:"tensor_parallel_size"` // vLLM MMProj string `yaml:"mmproj"` RopeScaling string `yaml:"rope_scaling"` From afa1bca1e367c2a52fd584d95d5a98904cadb353 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 20 Apr 2024 20:20:10 +0200 Subject: [PATCH 0089/2648] fix(llama.cpp): set -1 as default for max tokens (#2087) Signed-off-by: Ettore Di Giacinto --- core/config/backend_config.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/config/backend_config.go b/core/config/backend_config.go index a439ee63..6ca24afa 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -205,15 +205,15 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) { defaultTopP := 0.95 defaultTopK := 40 defaultTemp := 0.9 - defaultMaxTokens := 2048 defaultMirostat := 2 defaultMirostatTAU := 5.0 defaultMirostatETA := 0.1 defaultTypicalP := 1.0 defaultTFZ := 1.0 + defaultInfinity := -1 // Try to offload all GPU layers (if GPU is found) - defaultNGPULayers := 99999999 + defaultHigh := 99999999 trueV := true falseV := false @@ -254,7 +254,7 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) { } if cfg.Maxtokens == nil { - cfg.Maxtokens = &defaultMaxTokens + cfg.Maxtokens = &defaultInfinity } if cfg.Mirostat == nil { @@ -269,7 +269,7 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) { cfg.MirostatTAU = &defaultMirostatTAU } if cfg.NGPULayers == nil { - cfg.NGPULayers = &defaultNGPULayers + cfg.NGPULayers = &defaultHigh } if cfg.LowVRAM == nil { From 284ad026b1ce3d2751a51e48e5eea8ea6458e191 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 21 Apr 2024 01:19:57 +0200 Subject: [PATCH 0090/2648] refactor(routes): split routes registration (#2077) Signed-off-by: Ettore Di Giacinto --- Makefile | 2 +- core/http/api.go | 135 ++----------------------- core/http/endpoints/localai/welcome.go | 28 +++++ core/http/render.go | 37 ------- core/http/routes/elevenlabs.go | 19 ++++ core/http/routes/localai.go | 64 ++++++++++++ core/http/routes/openai.go | 86 ++++++++++++++++ core/http/routes/welcome.go | 23 +++++ 8 files changed, 227 insertions(+), 167 deletions(-) create mode 100644 core/http/endpoints/localai/welcome.go create mode 100644 core/http/routes/elevenlabs.go create mode 100644 core/http/routes/localai.go create mode 100644 core/http/routes/openai.go create mode 100644 core/http/routes/welcome.go diff --git a/Makefile b/Makefile index b9af4612..ea81b535 100644 --- a/Makefile +++ b/Makefile @@ -714,4 +714,4 @@ docker-image-intel-xpu: .PHONY: swagger swagger: - swag init -g core/http/api.go --output swagger + swag init -g core/http/app.go --output swagger diff --git a/core/http/api.go b/core/http/api.go index fe8f711c..1061627f 100644 --- a/core/http/api.go +++ b/core/http/api.go @@ -8,22 +8,21 @@ import ( "github.com/go-skynet/LocalAI/pkg/utils" - "github.com/go-skynet/LocalAI/core/http/endpoints/elevenlabs" "github.com/go-skynet/LocalAI/core/http/endpoints/localai" "github.com/go-skynet/LocalAI/core/http/endpoints/openai" + "github.com/go-skynet/LocalAI/core/http/routes" "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/schema" "github.com/go-skynet/LocalAI/core/services" - "github.com/go-skynet/LocalAI/internal" "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/contrib/fiberzerolog" "github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2/middleware/cors" "github.com/gofiber/fiber/v2/middleware/recover" - "github.com/gofiber/swagger" // swagger handler + // swagger handler "github.com/rs/zerolog/log" ) @@ -175,16 +174,6 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi app.Use(c) } - // LocalAI API endpoints - galleryService := services.NewGalleryService(appConfig.ModelPath) - galleryService.Start(appConfig.Context, cl) - - app.Get("/version", auth, func(c *fiber.Ctx) error { - return c.JSON(struct { - Version string `json:"version"` - }{Version: internal.PrintableVersion()}) - }) - // Make sure directories exists os.MkdirAll(appConfig.ImageDir, 0755) os.MkdirAll(appConfig.AudioDir, 0755) @@ -197,122 +186,10 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants) utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles) - app.Get("/swagger/*", swagger.HandlerDefault) // default - - welcomeRoute( - app, - cl, - ml, - appConfig, - auth, - ) - - modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService) - app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint()) - app.Get("/models/available", auth, modelGalleryEndpointService.ListModelFromGalleryEndpoint()) - app.Get("/models/galleries", auth, modelGalleryEndpointService.ListModelGalleriesEndpoint()) - app.Post("/models/galleries", auth, modelGalleryEndpointService.AddModelGalleryEndpoint()) - app.Delete("/models/galleries", auth, modelGalleryEndpointService.RemoveModelGalleryEndpoint()) - app.Get("/models/jobs/:uuid", auth, modelGalleryEndpointService.GetOpStatusEndpoint()) - app.Get("/models/jobs", auth, modelGalleryEndpointService.GetAllStatusEndpoint()) - - app.Post("/tts", auth, localai.TTSEndpoint(cl, ml, appConfig)) - - // Elevenlabs - app.Post("/v1/text-to-speech/:voice-id", auth, elevenlabs.TTSEndpoint(cl, ml, appConfig)) - - // Stores - sl := model.NewModelLoader("") - app.Post("/stores/set", auth, localai.StoresSetEndpoint(sl, appConfig)) - app.Post("/stores/delete", auth, localai.StoresDeleteEndpoint(sl, appConfig)) - app.Post("/stores/get", auth, localai.StoresGetEndpoint(sl, appConfig)) - app.Post("/stores/find", auth, localai.StoresFindEndpoint(sl, appConfig)) - - // openAI compatible API endpoint - - // chat - app.Post("/v1/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig)) - app.Post("/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig)) - - // edit - app.Post("/v1/edits", auth, openai.EditEndpoint(cl, ml, appConfig)) - app.Post("/edits", auth, openai.EditEndpoint(cl, ml, appConfig)) - - // assistant - app.Get("/v1/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig)) - app.Get("/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig)) - app.Post("/v1/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig)) - app.Post("/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig)) - app.Delete("/v1/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig)) - app.Delete("/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig)) - app.Get("/v1/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig)) - app.Get("/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig)) - app.Post("/v1/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig)) - app.Post("/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig)) - app.Get("/v1/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig)) - app.Get("/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig)) - app.Post("/v1/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig)) - app.Post("/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig)) - app.Delete("/v1/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig)) - app.Delete("/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig)) - app.Get("/v1/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig)) - app.Get("/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig)) - - // files - app.Post("/v1/files", auth, openai.UploadFilesEndpoint(cl, appConfig)) - app.Post("/files", auth, openai.UploadFilesEndpoint(cl, appConfig)) - app.Get("/v1/files", auth, openai.ListFilesEndpoint(cl, appConfig)) - app.Get("/files", auth, openai.ListFilesEndpoint(cl, appConfig)) - app.Get("/v1/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig)) - app.Get("/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig)) - app.Delete("/v1/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig)) - app.Delete("/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig)) - app.Get("/v1/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig)) - app.Get("/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig)) - - // completion - app.Post("/v1/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig)) - app.Post("/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig)) - app.Post("/v1/engines/:model/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig)) - - // embeddings - app.Post("/v1/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig)) - app.Post("/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig)) - app.Post("/v1/engines/:model/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig)) - - // audio - app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(cl, ml, appConfig)) - app.Post("/v1/audio/speech", auth, localai.TTSEndpoint(cl, ml, appConfig)) - - // images - app.Post("/v1/images/generations", auth, openai.ImageEndpoint(cl, ml, appConfig)) - - if appConfig.ImageDir != "" { - app.Static("/generated-images", appConfig.ImageDir) - } - - if appConfig.AudioDir != "" { - app.Static("/generated-audio", appConfig.AudioDir) - } - - ok := func(c *fiber.Ctx) error { - return c.SendStatus(200) - } - - // Kubernetes health checks - app.Get("/healthz", ok) - app.Get("/readyz", ok) - - // Experimental Backend Statistics Module - backendMonitor := services.NewBackendMonitor(cl, ml, appConfig) // Split out for now - app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(backendMonitor)) - app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(backendMonitor)) - - // models - app.Get("/v1/models", auth, openai.ListModelsEndpoint(cl, ml)) - app.Get("/models", auth, openai.ListModelsEndpoint(cl, ml)) - - app.Get("/metrics", auth, localai.LocalAIMetricsEndpoint()) + routes.RegisterElevenLabsRoutes(app, cl, ml, appConfig, auth) + routes.RegisterLocalAIRoutes(app, cl, ml, appConfig, auth) + routes.RegisterOpenAIRoutes(app, cl, ml, appConfig, auth) + routes.RegisterPagesRoutes(app, cl, ml, appConfig, auth) // Define a custom 404 handler // Note: keep this at the bottom! diff --git a/core/http/endpoints/localai/welcome.go b/core/http/endpoints/localai/welcome.go new file mode 100644 index 00000000..fd3e6230 --- /dev/null +++ b/core/http/endpoints/localai/welcome.go @@ -0,0 +1,28 @@ +package localai + +import ( + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/internal" + "github.com/gofiber/fiber/v2" +) + +func WelcomeEndpoint(appConfig *config.ApplicationConfig, + models []string, backendConfigs []config.BackendConfig) func(*fiber.Ctx) error { + return func(c *fiber.Ctx) error { + summary := fiber.Map{ + "Title": "LocalAI API - " + internal.PrintableVersion(), + "Version": internal.PrintableVersion(), + "Models": models, + "ModelsConfig": backendConfigs, + "ApplicationConfig": appConfig, + } + + if string(c.Context().Request.Header.ContentType()) == "application/json" || len(c.Accepts("html")) == 0 { + // The client expects a JSON response + return c.Status(fiber.StatusOK).JSON(summary) + } else { + // Render index + return c.Render("views/index", summary) + } + } +} diff --git a/core/http/render.go b/core/http/render.go index c5045868..8f1b36c6 100644 --- a/core/http/render.go +++ b/core/http/render.go @@ -7,10 +7,7 @@ import ( "net/http" "github.com/Masterminds/sprig/v3" - "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/schema" - "github.com/go-skynet/LocalAI/internal" - "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" fiberhtml "github.com/gofiber/template/html/v2" "github.com/russross/blackfriday" @@ -33,40 +30,6 @@ func notFoundHandler(c *fiber.Ctx) error { return nil } -func welcomeRoute( - app *fiber.App, - cl *config.BackendConfigLoader, - ml *model.ModelLoader, - appConfig *config.ApplicationConfig, - auth func(*fiber.Ctx) error, -) { - if appConfig.DisableWelcomePage { - return - } - - models, _ := ml.ListModels() - backendConfigs := cl.GetAllBackendConfigs() - - app.Get("/", auth, func(c *fiber.Ctx) error { - summary := fiber.Map{ - "Title": "LocalAI API - " + internal.PrintableVersion(), - "Version": internal.PrintableVersion(), - "Models": models, - "ModelsConfig": backendConfigs, - "ApplicationConfig": appConfig, - } - - if string(c.Context().Request.Header.ContentType()) == "application/json" || len(c.Accepts("html")) == 0 { - // The client expects a JSON response - return c.Status(fiber.StatusOK).JSON(summary) - } else { - // Render index - return c.Render("views/index", summary) - } - }) - -} - func renderEngine() *fiberhtml.Engine { engine := fiberhtml.NewFileSystem(http.FS(viewsfs), ".html") engine.AddFuncMap(sprig.FuncMap()) diff --git a/core/http/routes/elevenlabs.go b/core/http/routes/elevenlabs.go new file mode 100644 index 00000000..e24a19a8 --- /dev/null +++ b/core/http/routes/elevenlabs.go @@ -0,0 +1,19 @@ +package routes + +import ( + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/http/endpoints/elevenlabs" + "github.com/go-skynet/LocalAI/pkg/model" + "github.com/gofiber/fiber/v2" +) + +func RegisterElevenLabsRoutes(app *fiber.App, + cl *config.BackendConfigLoader, + ml *model.ModelLoader, + appConfig *config.ApplicationConfig, + auth func(*fiber.Ctx) error) { + + // Elevenlabs + app.Post("/v1/text-to-speech/:voice-id", auth, elevenlabs.TTSEndpoint(cl, ml, appConfig)) + +} diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go new file mode 100644 index 00000000..2651a53e --- /dev/null +++ b/core/http/routes/localai.go @@ -0,0 +1,64 @@ +package routes + +import ( + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/http/endpoints/localai" + "github.com/go-skynet/LocalAI/core/services" + "github.com/go-skynet/LocalAI/internal" + "github.com/go-skynet/LocalAI/pkg/model" + "github.com/gofiber/fiber/v2" + "github.com/gofiber/swagger" +) + +func RegisterLocalAIRoutes(app *fiber.App, + cl *config.BackendConfigLoader, + ml *model.ModelLoader, + appConfig *config.ApplicationConfig, + auth func(*fiber.Ctx) error) { + + app.Get("/swagger/*", swagger.HandlerDefault) // default + + // LocalAI API endpoints + galleryService := services.NewGalleryService(appConfig.ModelPath) + galleryService.Start(appConfig.Context, cl) + + modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService) + app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint()) + app.Get("/models/available", auth, modelGalleryEndpointService.ListModelFromGalleryEndpoint()) + app.Get("/models/galleries", auth, modelGalleryEndpointService.ListModelGalleriesEndpoint()) + app.Post("/models/galleries", auth, modelGalleryEndpointService.AddModelGalleryEndpoint()) + app.Delete("/models/galleries", auth, modelGalleryEndpointService.RemoveModelGalleryEndpoint()) + app.Get("/models/jobs/:uuid", auth, modelGalleryEndpointService.GetOpStatusEndpoint()) + app.Get("/models/jobs", auth, modelGalleryEndpointService.GetAllStatusEndpoint()) + + app.Post("/tts", auth, localai.TTSEndpoint(cl, ml, appConfig)) + + // Stores + sl := model.NewModelLoader("") + app.Post("/stores/set", auth, localai.StoresSetEndpoint(sl, appConfig)) + app.Post("/stores/delete", auth, localai.StoresDeleteEndpoint(sl, appConfig)) + app.Post("/stores/get", auth, localai.StoresGetEndpoint(sl, appConfig)) + app.Post("/stores/find", auth, localai.StoresFindEndpoint(sl, appConfig)) + + // Kubernetes health checks + ok := func(c *fiber.Ctx) error { + return c.SendStatus(200) + } + + app.Get("/healthz", ok) + app.Get("/readyz", ok) + + app.Get("/metrics", auth, localai.LocalAIMetricsEndpoint()) + + // Experimental Backend Statistics Module + backendMonitor := services.NewBackendMonitor(cl, ml, appConfig) // Split out for now + app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(backendMonitor)) + app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(backendMonitor)) + + app.Get("/version", auth, func(c *fiber.Ctx) error { + return c.JSON(struct { + Version string `json:"version"` + }{Version: internal.PrintableVersion()}) + }) + +} diff --git a/core/http/routes/openai.go b/core/http/routes/openai.go new file mode 100644 index 00000000..c51ccdcb --- /dev/null +++ b/core/http/routes/openai.go @@ -0,0 +1,86 @@ +package routes + +import ( + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/http/endpoints/localai" + "github.com/go-skynet/LocalAI/core/http/endpoints/openai" + "github.com/go-skynet/LocalAI/pkg/model" + "github.com/gofiber/fiber/v2" +) + +func RegisterOpenAIRoutes(app *fiber.App, + cl *config.BackendConfigLoader, + ml *model.ModelLoader, + appConfig *config.ApplicationConfig, + auth func(*fiber.Ctx) error) { + // openAI compatible API endpoint + + // chat + app.Post("/v1/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig)) + app.Post("/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig)) + + // edit + app.Post("/v1/edits", auth, openai.EditEndpoint(cl, ml, appConfig)) + app.Post("/edits", auth, openai.EditEndpoint(cl, ml, appConfig)) + + // assistant + app.Get("/v1/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig)) + app.Get("/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig)) + app.Post("/v1/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig)) + app.Post("/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig)) + app.Delete("/v1/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig)) + app.Delete("/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig)) + app.Get("/v1/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig)) + app.Get("/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig)) + app.Post("/v1/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig)) + app.Post("/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig)) + app.Get("/v1/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig)) + app.Get("/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig)) + app.Post("/v1/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig)) + app.Post("/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig)) + app.Delete("/v1/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig)) + app.Delete("/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig)) + app.Get("/v1/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig)) + app.Get("/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig)) + + // files + app.Post("/v1/files", auth, openai.UploadFilesEndpoint(cl, appConfig)) + app.Post("/files", auth, openai.UploadFilesEndpoint(cl, appConfig)) + app.Get("/v1/files", auth, openai.ListFilesEndpoint(cl, appConfig)) + app.Get("/files", auth, openai.ListFilesEndpoint(cl, appConfig)) + app.Get("/v1/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig)) + app.Get("/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig)) + app.Delete("/v1/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig)) + app.Delete("/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig)) + app.Get("/v1/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig)) + app.Get("/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig)) + + // completion + app.Post("/v1/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig)) + app.Post("/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig)) + app.Post("/v1/engines/:model/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig)) + + // embeddings + app.Post("/v1/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig)) + app.Post("/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig)) + app.Post("/v1/engines/:model/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig)) + + // audio + app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(cl, ml, appConfig)) + app.Post("/v1/audio/speech", auth, localai.TTSEndpoint(cl, ml, appConfig)) + + // images + app.Post("/v1/images/generations", auth, openai.ImageEndpoint(cl, ml, appConfig)) + + if appConfig.ImageDir != "" { + app.Static("/generated-images", appConfig.ImageDir) + } + + if appConfig.AudioDir != "" { + app.Static("/generated-audio", appConfig.AudioDir) + } + + // models + app.Get("/v1/models", auth, openai.ListModelsEndpoint(cl, ml)) + app.Get("/models", auth, openai.ListModelsEndpoint(cl, ml)) +} diff --git a/core/http/routes/welcome.go b/core/http/routes/welcome.go new file mode 100644 index 00000000..29b9e586 --- /dev/null +++ b/core/http/routes/welcome.go @@ -0,0 +1,23 @@ +package routes + +import ( + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/http/endpoints/localai" + "github.com/go-skynet/LocalAI/pkg/model" + "github.com/gofiber/fiber/v2" +) + +func RegisterPagesRoutes(app *fiber.App, + cl *config.BackendConfigLoader, + ml *model.ModelLoader, + appConfig *config.ApplicationConfig, + auth func(*fiber.Ctx) error) { + + models, _ := ml.ListModels() + backendConfigs := cl.GetAllBackendConfigs() + + if !appConfig.DisableWelcomePage { + app.Get("/", auth, localai.WelcomeEndpoint(appConfig, models, backendConfigs)) + } + +} From 180cd4ccda0753ef1afb2eb07857ec0534ea3366 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 21 Apr 2024 16:34:00 +0200 Subject: [PATCH 0091/2648] fix(llama.cpp-ggml): fixup `max_tokens` for old backend (#2094) fix(llama.cpp-ggml): set 0 as default for `max_tokens` Signed-off-by: Ettore Di Giacinto --- core/config/backend_config.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/config/backend_config.go b/core/config/backend_config.go index 6ca24afa..dfc216dc 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -210,7 +210,7 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) { defaultMirostatETA := 0.1 defaultTypicalP := 1.0 defaultTFZ := 1.0 - defaultInfinity := -1 + defaultZero := 0 // Try to offload all GPU layers (if GPU is found) defaultHigh := 99999999 @@ -254,7 +254,7 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) { } if cfg.Maxtokens == nil { - cfg.Maxtokens = &defaultInfinity + cfg.Maxtokens = &defaultZero } if cfg.Mirostat == nil { From 39814cab32a19fa4a6b88935d4587c6c6bbebe16 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 21 Apr 2024 16:46:13 +0200 Subject: [PATCH 0092/2648] Update README.md Signed-off-by: Ettore Di Giacinto --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 4c2f68b2..e28e3cb0 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,7 @@ [Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) +- llama3: https://github.com/mudler/LocalAI/discussions/2076 - Parler-TTS: https://github.com/mudler/LocalAI/pull/2027 - Landing page: https://github.com/mudler/LocalAI/pull/1922 - Openvino support: https://github.com/mudler/LocalAI/pull/1892 From 66b002458db4ec93133d066326a63585ba236412 Mon Sep 17 00:00:00 2001 From: fakezeta Date: Sun, 21 Apr 2024 18:20:25 +0200 Subject: [PATCH 0093/2648] Transformer Backend: Implementing use_tokenizer_template and stop_prompts options (#2090) * fix regression #1971 fixes regression #1971 introduced by intel_extension_for_transformers==1.4 * UseTokenizerTemplate and StopPrompt Implementation of use_tokenizer_template and stopwords options --- .../transformers/transformers_server.py | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py index c7f1cd75..1b38a956 100755 --- a/backend/python/transformers/transformers_server.py +++ b/backend/python/transformers/transformers_server.py @@ -148,7 +148,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): else: device_map="CPU" self.model = OVModelForCausalLM.from_pretrained(model_name, - compile=True, + compile=True, + ov_config={"PERFORMANCE_HINT": "LATENCY"}, device=device_map) self.OV = True else: @@ -212,12 +213,25 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): set_seed(request.Seed) if request.TopP == 0: request.TopP = 0.9 + + if request.TopK == 0: + request.TopK = 40 max_tokens = 200 if request.Tokens > 0: max_tokens = request.Tokens - inputs = self.tokenizer(request.Prompt, return_tensors="pt") + prompt = request.Prompt + if not request.Prompt and request.UseTokenizerTemplate and request.Messages: + prompt = self.tokenizer.apply_chat_template(request.Messages, tokenize=False, add_generation_prompt=True) + + eos_token_id = self.tokenizer.eos_token_id + if request.StopPrompts: + eos_token_id = [] + for word in request.StopPrompts: + eos_token_id.append(self.tokenizer.convert_tokens_to_ids(word)) + + inputs = self.tokenizer(prompt, return_tensors="pt") if self.CUDA: inputs = inputs.to("cuda") if XPU and self.OV == False: @@ -235,7 +249,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): top_k=request.TopK, do_sample=True, attention_mask=inputs["attention_mask"], - eos_token_id=self.tokenizer.eos_token_id, + eos_token_id=eos_token_id, pad_token_id=self.tokenizer.eos_token_id, streamer=streamer) thread=Thread(target=self.model.generate, kwargs=config) @@ -264,7 +278,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): top_k=request.TopK, do_sample=True, attention_mask=inputs["attention_mask"], - eos_token_id=self.tokenizer.eos_token_id, + eos_token_id=eos_token_id, pad_token_id=self.tokenizer.eos_token_id) generated_text = self.tokenizer.batch_decode(outputs[:, inputs["input_ids"].shape[1]:], skip_special_tokens=True)[0] From 38c9abed8bef6cb9c9b7c29ee1b92f86e5317ec7 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 21 Apr 2024 18:35:30 +0200 Subject: [PATCH 0094/2648] :arrow_up: Update ggerganov/llama.cpp (#2089) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ea81b535..761c76d6 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=0e4802b2ecbaab04b4f829fde4a3096ca19c84b5 +CPPLLAMA_VERSION?=b8109bc0139f15a5b321909f47510b89dca47ffc # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 228bc4903f7eed3d384e1094255a8159153158a2 Mon Sep 17 00:00:00 2001 From: Dave Date: Sun, 21 Apr 2024 16:39:17 -0400 Subject: [PATCH 0095/2648] fix: action-tmate detached (#2092) connect-timeout-seconds works best with `detached: true` Signed-off-by: Dave --- .github/workflows/test.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9eb4f084..f50479e1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -123,6 +123,7 @@ jobs: if: ${{ failure() }} uses: mxschmitt/action-tmate@v3.18 with: + detached: true connect-timeout-seconds: 180 limit-access-to-actor: true @@ -177,6 +178,7 @@ jobs: if: ${{ failure() }} uses: mxschmitt/action-tmate@v3.18 with: + detached: true connect-timeout-seconds: 180 limit-access-to-actor: true @@ -213,5 +215,6 @@ jobs: if: ${{ failure() }} uses: mxschmitt/action-tmate@v3.18 with: + detached: true connect-timeout-seconds: 180 - limit-access-to-actor: true \ No newline at end of file + limit-access-to-actor: true From f3f6535aad2c899afbc71b273ebd9282438b7814 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 21 Apr 2024 22:39:28 +0200 Subject: [PATCH 0096/2648] fix: rename fiber entrypoint from http/api to http/app (#2096) Signed-off-by: Ettore Di Giacinto Co-authored-by: Dave --- core/http/{api.go => app.go} | 0 core/http/{api_test.go => app_test.go} | 0 core/http/{apt_suite_test.go => http_suite_test.go} | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename core/http/{api.go => app.go} (100%) rename core/http/{api_test.go => app_test.go} (100%) rename core/http/{apt_suite_test.go => http_suite_test.go} (100%) diff --git a/core/http/api.go b/core/http/app.go similarity index 100% rename from core/http/api.go rename to core/http/app.go diff --git a/core/http/api_test.go b/core/http/app_test.go similarity index 100% rename from core/http/api_test.go rename to core/http/app_test.go diff --git a/core/http/apt_suite_test.go b/core/http/http_suite_test.go similarity index 100% rename from core/http/apt_suite_test.go rename to core/http/http_suite_test.go From 220958a87c17cf6f1c82dcb4f3f3f8756ea3881d Mon Sep 17 00:00:00 2001 From: Ikko Eltociear Ashimine Date: Mon, 22 Apr 2024 13:34:59 +0900 Subject: [PATCH 0097/2648] fix: typo in models.go (#2099) --- core/cli/models.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/cli/models.go b/core/cli/models.go index 62ef366b..6615e21d 100644 --- a/core/cli/models.go +++ b/core/cli/models.go @@ -25,7 +25,7 @@ type ModelsInstall struct { } type ModelsCMD struct { - List ModelsList `cmd:"" help:"List the models avaiable in your galleries" default:"withargs"` + List ModelsList `cmd:"" help:"List the models available in your galleries" default:"withargs"` Install ModelsInstall `cmd:"" help:"Install a model from the gallery"` } From 729378ca98b66ef84921c8f0eb40208e0c2721a5 Mon Sep 17 00:00:00 2001 From: jtwolfe Date: Mon, 22 Apr 2024 23:47:51 +1000 Subject: [PATCH 0098/2648] AMD/ROCm Documentation update + formatting fix (#2100) * Update aio-images.md Signed-off-by: jtwolfe * Update aio-images.md Signed-off-by: jtwolfe * Update aio-images.md Signed-off-by: jtwolfe * Update GPU-acceleration.md Signed-off-by: jtwolfe * Update GPU-acceleration.md Signed-off-by: jtwolfe * Update GPU-acceleration.md Signed-off-by: jtwolfe * Update GPU-acceleration.md Signed-off-by: jtwolfe * Update GPU-acceleration.md Signed-off-by: jtwolfe * Update GPU-acceleration.md Signed-off-by: jtwolfe --------- Signed-off-by: jtwolfe --- .../content/docs/features/GPU-acceleration.md | 139 +++++++++++++++++- docs/content/docs/reference/aio-images.md | 15 +- 2 files changed, 146 insertions(+), 8 deletions(-) diff --git a/docs/content/docs/features/GPU-acceleration.md b/docs/content/docs/features/GPU-acceleration.md index aa931f07..b382309e 100644 --- a/docs/content/docs/features/GPU-acceleration.md +++ b/docs/content/docs/features/GPU-acceleration.md @@ -12,7 +12,7 @@ Section under construction This section contains instruction on how to use LocalAI with GPU acceleration. {{% alert icon="⚡" context="warning" %}} -For accelleration for AMD or Metal HW there are no specific container images, see the [build]({{%relref "docs/getting-started/build#Acceleration" %}}) +For accelleration for AMD or Metal HW is still in development, for additional details see the [build]({{%relref "docs/getting-started/build#Acceleration" %}}) {{% /alert %}} @@ -110,6 +110,143 @@ llama_model_load_internal: total VRAM used: 1598 MB llama_init_from_file: kv self size = 512.00 MB ``` +## ROCM(AMD) acceleration + +There are a limited number of tested configurations for ROCm systems however most newer deditated GPU consumer grade devices seem to be supported under the current ROCm6 implementation. + +Due to the nature of ROCm it is best to run all implementations in containers as this limits the number of packages required for installation on host system, compatability and package versions for dependencies across all variations of OS must be tested independently if disired, please refer to the [build]({{%relref "docs/getting-started/build#Acceleration" %}}) documentation. + +### Requirements + +- `ROCm 6.x.x` compatible GPU/accelerator +- OS: `Ubuntu` (22.04, 20.04), `RHEL` (9.3, 9.2, 8.9, 8.8), `SLES` (15.5, 15.4) +- Installed to host: `amdgpu-dkms` and `rocm` >=6.0.0 as per ROCm documentation. + +### Recommendations + +- Do not use on a system running Wayland. +- If running with Xorg do not use GPU assigned for compute for desktop rendering. +- Ensure at least 100GB of free space on disk hosting container runtime and storing images prior to installation. + +### Limitations + +Ongoing verification testing of ROCm compatability with integrated backends. +Please note the following list of verified backends and devices. + +### Verified + +The devices in the following list have been tested with `hipblas` images running `ROCm 6.0.0` + +| Backend | Verified | Devices | +| ---- | ---- | ---- | +| llama.cpp | yes | Radeon VII (gfx906) | +| diffusers | yes | Radeon VII (gfx906) | +| piper | yes | Radeon VII (gfx906) | +| whisper | no | none | +| autogptq | no | none | +| bark | no | none | +| coqui | no | none | +| transformers | no | none | +| exllama | no | none | +| exllama2 | no | none | +| mamba | no | none | +| petals | no | none | +| sentencetransformers | no | none | +| transformers-musicgen | no | none | +| vall-e-x | no | none | +| vllm | no | none | + +**You can help by expanding this list.** + +### System Prep + +1. Check your GPU LLVM target is compatible with the version of ROCm. This can be found in the [LLVM Docs](https://llvm.org/docs/AMDGPUUsage.html). +2. Check which ROCm version is compatible with your LLVM target and your chosen OS (pay special attention to supported kernel versions). See the following for compatability for ([ROCm 6.0.0](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.0.0/reference/system-requirements.html)) or ([ROCm 6.0.2](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html)) +3. Install you chosen version of the `dkms` and `rocm` (it is recommended that the native package manager be used for this process for any OS as version changes are executed more easily via this method if updates are required). Take care to restart after installing `amdgpu-dkms` and before installing `rocm`, for details regarding this see the installation documentation for your chosen OS ([6.0.2](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/native-install/index.html) or [6.0.0](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.0.0/how-to/native-install/index.html)) +4. Deploy. Yes it's that easy. + +#### Setup Example (Docker/containerd) + +The following are examples of the ROCm specific configuration elements required. + +```yaml +# docker-compose.yaml + # For full functionality select a non-'core' image, version locking the image is recommended for debug purposes. + image: quay.io/go-skynet/local-ai:master-aio-gpu-hipblas + environment: + - DEBUG=true + # If your gpu is not already included in the current list of default targets the following build details are required. + - REBUILD=true + - BUILD_TYPE=hipblas + - GPU_TARGETS=gfx906 # Example for Radeon VII + devices: + # AMD GPU only require the following devices be passed through to the container for offloading to occur. + - /dev/dri + - /dev/kfd +``` + +The same can also be executed as a `run` for your container runtime + +``` +docker run \ + -e DEBUG=true \ + -e REBUILD=true \ + -e BUILD_TYPE=hipblas \ + -e GPU_TARGETS=gfx906 \ + --device /dev/dri \ + --device /dev/kfd \ + quay.io/go-skynet/local-ai:master-aio-gpu-hipblas +``` + +Please ensure to add all other required environment variables, port forwardings, etc to your `compose` file or `run` command. + +The rebuild process will take some time to complete when deploying these containers and it is recommended that you `pull` the image prior to deployment as depending on the version these images may be ~20GB in size. + +#### Example (k8s) (Advanced Deployment/WIP) + +For k8s deployments there is an additional step required before deployment, this is the deployment of the [ROCm/k8s-device-plugin](https://artifacthub.io/packages/helm/amd-gpu-helm/amd-gpu). +For any k8s environment the documentation provided by AMD from the ROCm project should be successful. It is recommended that if you use rke2 or OpenShift that you deploy the SUSE or RedHat provided version of this resource to ensure compatability. +After this has been completed the [helm chart from go-skynet](https://github.com/go-skynet/helm-charts) can be configured and deployed mostly un-edited. + +The following are details of the changes that should be made to ensure proper function. +While these details may be configurable in the `values.yaml` development of this Helm chart is ongoing and is subject to change. + +The following details indicate the final state of the localai deployment relevant to GPU function. + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {NAME}-local-ai +... +spec: + ... + template: + ... + spec: + containers: + - env: + - name: HIP_VISIBLE_DEVICES + value: '0' + # This variable indicates the devices availible to container (0:device1 1:device2 2:device3) etc. + # For multiple devices (say device 1 and 3) the value would be equivelant to HIP_VISIBLE_DEVICES="0,2" + # Please take note of this when an iGPU is present in host system as compatability is not assured. + ... + resources: + limits: + amd.com/gpu: '1' + requests: + amd.com/gpu: '1' +``` + +This configuration has been tested on a 'custom' cluster managed by SUSE Rancher that was deployed on top of Ubuntu 22.04.4, certification of other configuration is ongoing and compatability is not gauranteed. + +### Notes + +- When installing the ROCM kernel driver on your system ensure that you are installing an equal or newer version that that which is currently implemented in LocalAI (6.0.0 at time of writing). +- AMD documentation indicates that this will ensure functionality however your milage may vary depending on the GPU and distro you are using. +- If you encounter an `Error 413` on attempting to upload an audio file or image for whisper or llava/bakllava on a k8s deployment, note that the ingress for your deployment may require the annontation `nginx.ingress.kubernetes.io/proxy-body-size: "25m"` to allow larger uploads. This may be included in future versions of the helm chart. + ## Intel acceleration (sycl) ### Requirements diff --git a/docs/content/docs/reference/aio-images.md b/docs/content/docs/reference/aio-images.md index 40f01f06..b5253ee4 100644 --- a/docs/content/docs/reference/aio-images.md +++ b/docs/content/docs/reference/aio-images.md @@ -9,13 +9,14 @@ All-In-One images are images that come pre-configured with a set of models and b In the AIO images there are models configured with the names of OpenAI models, however, they are really backed by Open Source models. You can find the table below -| Category | Model name | Real model | -| Text Generation | `gpt-4` | `phi-2`(CPU) or `hermes-2-pro-mistral`(GPU) | -| Multimodal | `gpt-4-vision-preview` | `bakllava`(CPU) or `llava-1.6-mistral`(GPU) | -| Text generation | `stablediffusion` | `stablediffusion`(CPU) `dreamshaper-8` (GPU) | -| Audio transcription | `whisper-1` | `whisper` with the `whisper-base` model | -| Text to Audio | `tts-1` | the `en-us-amy-low.onnx` model with `rhasspy` | -| Embeddings | `text-embedding-ada-002` | | +| Category | Model name | Real model (CPU) | Real model (GPU) | +| ---- | ---- | ---- | ---- | +| Text Generation | `gpt-4` | `phi-2` | `hermes-2-pro-mistral` | +| Multimodal Vision | `gpt-4-vision-preview` | `bakllava` | `llava-1.6-mistral` | +| Image Generation | `stablediffusion` | `stablediffusion` | `dreamshaper-8` | +| Speech to Text | `whisper-1` | `whisper` with `whisper-base` model | <= same | +| Text to Speech | `tts-1` | `en-us-amy-low.onnx` from `rhasspy/piper` | <= same | +| Embeddings | `text-embedding-ada-002` | `all-MiniLM-L6-v2` in Q4 | `all-MiniLM-L6-v2` | ## Usage From b6f0e80d54f3a0ab50688e0c391258a206f677d5 Mon Sep 17 00:00:00 2001 From: Taikono-Himazin Date: Mon, 22 Apr 2024 23:37:13 +0900 Subject: [PATCH 0099/2648] Update text-generation.md (#2095) Signed-off-by: Taikono-Himazin --- docs/content/docs/features/text-generation.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/content/docs/features/text-generation.md b/docs/content/docs/features/text-generation.md index c11894e7..3f3f0b56 100644 --- a/docs/content/docs/features/text-generation.md +++ b/docs/content/docs/features/text-generation.md @@ -257,6 +257,10 @@ parameters: # swap_space: 2 # Uncomment to specify the maximum length of a sequence (including prompt and output) # max_model_len: 32768 +# Uncomment and specify the number of Tensor divisions. +# Allows you to partition and run large models. Performance gains are limited. +# https://github.com/vllm-project/vllm/issues/1435 +# tensor_parallel_size: 2 ``` The backend will automatically download the required files in order to run the model. @@ -356,4 +360,4 @@ template: completion: | {{.Input}} -``` \ No newline at end of file +``` From bd507678be6a45e81e1fb9f96e7620c6c4eb162f Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 23 Apr 2024 00:04:57 +0200 Subject: [PATCH 0100/2648] :arrow_up: Update docs version mudler/LocalAI (#2105) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- docs/data/version.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/data/version.json b/docs/data/version.json index 6a618115..55eebaeb 100644 --- a/docs/data/version.json +++ b/docs/data/version.json @@ -1,3 +1,3 @@ { - "version": "v2.12.4" + "version": "null" } From 0d8bf91699a9deee596011cb1c30be29ec680685 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 23 Apr 2024 09:22:58 +0200 Subject: [PATCH 0101/2648] feat: Galleries UI (#2104) * WIP: add models to webui Signed-off-by: Ettore Di Giacinto * Register routes Signed-off-by: Ettore Di Giacinto * fix: don't cache models Signed-off-by: Ettore Di Giacinto * small fixups Signed-off-by: Ettore Di Giacinto * fix: fixup multiple installs (strings.Clone) Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto --- README.md | 2 +- core/config/backend_config.go | 6 +- core/http/app.go | 6 +- core/http/elements/gallery.go | 171 +++++++++++++++++++++++++ core/http/endpoints/localai/welcome.go | 6 +- core/http/routes/localai.go | 3 +- core/http/routes/ui.go | 107 ++++++++++++++++ core/http/routes/welcome.go | 6 +- core/http/views/models.html | 40 ++++++ core/http/views/partials/head.html | 67 +++++++++- core/http/views/partials/navbar.html | 1 + docs/content/docs/overview.md | 2 +- go.mod | 5 +- go.sum | 2 + main.go | 2 +- pkg/downloader/progress.go | 13 ++ pkg/downloader/uri.go | 4 +- pkg/gallery/models.go | 4 +- pkg/gallery/op.go | 5 +- pkg/startup/model_preload.go | 2 +- 20 files changed, 431 insertions(+), 23 deletions(-) create mode 100644 core/http/elements/gallery.go create mode 100644 core/http/routes/ui.go create mode 100644 core/http/views/models.html diff --git a/README.md b/README.md index e28e3cb0..0b32febd 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ [![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[![Build and Release](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[![Bump dependencies](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/localai)](https://artifacthub.io/packages/search?repo=localai) -**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that’s compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU. +**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that’s compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU. It is created and maintained by [Ettore Di Giacinto](https://github.com/mudler). ## 🔥🔥 Hot topics / Roadmap diff --git a/core/config/backend_config.go b/core/config/backend_config.go index dfc216dc..64182e75 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -512,7 +512,7 @@ func (cl *BackendConfigLoader) Preload(modelPath string) error { for i, config := range cl.configs { // Download files and verify their SHA - for _, file := range config.DownloadFiles { + for i, file := range config.DownloadFiles { log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename) if err := utils.VerifyPath(file.Filename, modelPath); err != nil { @@ -521,7 +521,7 @@ func (cl *BackendConfigLoader) Preload(modelPath string) error { // Create file path filePath := filepath.Join(modelPath, file.Filename) - if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, status); err != nil { + if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, i, len(config.DownloadFiles), status); err != nil { return err } } @@ -535,7 +535,7 @@ func (cl *BackendConfigLoader) Preload(modelPath string) error { // check if file exists if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) { - err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", status) + err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", 0, 0, status) if err != nil { return err } diff --git a/core/http/app.go b/core/http/app.go index 1061627f..21652dd9 100644 --- a/core/http/app.go +++ b/core/http/app.go @@ -186,10 +186,14 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants) utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles) + galleryService := services.NewGalleryService(appConfig.ModelPath) + galleryService.Start(appConfig.Context, cl) + routes.RegisterElevenLabsRoutes(app, cl, ml, appConfig, auth) - routes.RegisterLocalAIRoutes(app, cl, ml, appConfig, auth) + routes.RegisterLocalAIRoutes(app, cl, ml, appConfig, galleryService, auth) routes.RegisterOpenAIRoutes(app, cl, ml, appConfig, auth) routes.RegisterPagesRoutes(app, cl, ml, appConfig, auth) + routes.RegisterUIRoutes(app, cl, ml, appConfig, galleryService, auth) // Define a custom 404 handler // Note: keep this at the bottom! diff --git a/core/http/elements/gallery.go b/core/http/elements/gallery.go new file mode 100644 index 00000000..370ca82d --- /dev/null +++ b/core/http/elements/gallery.go @@ -0,0 +1,171 @@ +package elements + +import ( + "fmt" + + "github.com/chasefleming/elem-go" + "github.com/chasefleming/elem-go/attrs" + "github.com/go-skynet/LocalAI/pkg/gallery" +) + +func DoneProgress(uid string) string { + return elem.Div( + attrs.Props{}, + elem.H3( + attrs.Props{ + "role": "status", + "id": "pblabel", + "tabindex": "-1", + "autofocus": "", + }, + elem.Text("Installation completed"), + ), + ).Render() +} + +func ErrorProgress(err string) string { + return elem.Div( + attrs.Props{}, + elem.H3( + attrs.Props{ + "role": "status", + "id": "pblabel", + "tabindex": "-1", + "autofocus": "", + }, + elem.Text("Error"+err), + ), + ).Render() +} + +func ProgressBar(progress string) string { + return elem.Div(attrs.Props{ + "class": "progress", + "role": "progressbar", + "aria-valuemin": "0", + "aria-valuemax": "100", + "aria-valuenow": "0", + "aria-labelledby": "pblabel", + }, + elem.Div(attrs.Props{ + "id": "pb", + "class": "progress-bar", + "style": "width:" + progress + "%", + }), + ).Render() +} + +func StartProgressBar(uid, progress string) string { + if progress == "" { + progress = "0" + } + return elem.Div(attrs.Props{ + "hx-trigger": "done", + "hx-get": "/browse/job/" + uid, + "hx-swap": "outerHTML", + "hx-target": "this", + }, + elem.H3( + attrs.Props{ + "role": "status", + "id": "pblabel", + "tabindex": "-1", + "autofocus": "", + }, + elem.Text("Installing"), + // This is a simple example of how to use the HTMLX library to create a progress bar that updates every 600ms. + elem.Div(attrs.Props{ + "hx-get": "/browse/job/progress/" + uid, + "hx-trigger": "every 600ms", + "hx-target": "this", + "hx-swap": "innerHTML", + }, + elem.Raw(ProgressBar(progress)), + ), + ), + ).Render() +} + +func ListModels(models []*gallery.GalleryModel) string { + modelsElements := []elem.Node{} + span := func(s string) elem.Node { + return elem.Span( + attrs.Props{ + "class": "float-right inline-block bg-green-500 text-white py-1 px-3 rounded-full text-xs", + }, + elem.Text(s), + ) + } + installButton := func(m *gallery.GalleryModel) elem.Node { + return elem.Button( + attrs.Props{ + "class": "float-right inline-block rounded bg-primary px-6 pb-2 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong", + // post the Model ID as param + "hx-post": "/browse/install/model/" + fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name), + }, + elem.Text("Install"), + ) + } + + descriptionDiv := func(m *gallery.GalleryModel) elem.Node { + + return elem.Div( + attrs.Props{ + "class": "p-6", + }, + elem.H5( + attrs.Props{ + "class": "mb-2 text-xl font-medium leading-tight", + }, + elem.Text(m.Name), + ), + elem.P( + attrs.Props{ + "class": "mb-4 text-base", + }, + elem.Text(m.Description), + ), + ) + } + + actionDiv := func(m *gallery.GalleryModel) elem.Node { + return elem.Div( + attrs.Props{ + "class": "px-6 pt-4 pb-2", + }, + elem.Span( + attrs.Props{ + "class": "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2", + }, + elem.Text("Repository: "+m.Gallery.Name), + ), + elem.If(m.Installed, span("Installed"), installButton(m)), + ) + } + + for _, m := range models { + modelsElements = append(modelsElements, + elem.Div( + attrs.Props{ + "class": "me-4 mb-2 block rounded-lg bg-white shadow-secondary-1 dark:bg-gray-800 dark:bg-surface-dark dark:text-white text-surface p-2", + }, + elem.Div( + attrs.Props{ + "class": "p-6", + }, + descriptionDiv(m), + actionDiv(m), + // elem.If(m.Installed, span("Installed"), installButton(m)), + + // elem.If(m.Installed, span("Installed"), span("Not Installed")), + ), + ), + ) + } + + wrapper := elem.Div(attrs.Props{ + "class": "dark grid grid-cols-1 grid-rows-1 md:grid-cols-2 ", + }, modelsElements...) + + return wrapper.Render() +} diff --git a/core/http/endpoints/localai/welcome.go b/core/http/endpoints/localai/welcome.go index fd3e6230..291422c6 100644 --- a/core/http/endpoints/localai/welcome.go +++ b/core/http/endpoints/localai/welcome.go @@ -3,12 +3,16 @@ package localai import ( "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/internal" + "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" ) func WelcomeEndpoint(appConfig *config.ApplicationConfig, - models []string, backendConfigs []config.BackendConfig) func(*fiber.Ctx) error { + cl *config.BackendConfigLoader, ml *model.ModelLoader) func(*fiber.Ctx) error { return func(c *fiber.Ctx) error { + models, _ := ml.ListModels() + backendConfigs := cl.GetAllBackendConfigs() + summary := fiber.Map{ "Title": "LocalAI API - " + internal.PrintableVersion(), "Version": internal.PrintableVersion(), diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go index 2651a53e..6415c894 100644 --- a/core/http/routes/localai.go +++ b/core/http/routes/localai.go @@ -14,13 +14,12 @@ func RegisterLocalAIRoutes(app *fiber.App, cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig, + galleryService *services.GalleryService, auth func(*fiber.Ctx) error) { app.Get("/swagger/*", swagger.HandlerDefault) // default // LocalAI API endpoints - galleryService := services.NewGalleryService(appConfig.ModelPath) - galleryService.Start(appConfig.Context, cl) modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService) app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint()) diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go new file mode 100644 index 00000000..b9ccd89a --- /dev/null +++ b/core/http/routes/ui.go @@ -0,0 +1,107 @@ +package routes + +import ( + "fmt" + "html/template" + "strings" + + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/http/elements" + "github.com/go-skynet/LocalAI/core/services" + "github.com/go-skynet/LocalAI/pkg/gallery" + "github.com/go-skynet/LocalAI/pkg/model" + "github.com/gofiber/fiber/v2" + "github.com/google/uuid" +) + +func RegisterUIRoutes(app *fiber.App, + cl *config.BackendConfigLoader, + ml *model.ModelLoader, + appConfig *config.ApplicationConfig, + galleryService *services.GalleryService, + auth func(*fiber.Ctx) error) { + + // Show the Models page + app.Get("/browse", auth, func(c *fiber.Ctx) error { + models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath) + + summary := fiber.Map{ + "Title": "LocalAI API - Models", + "Models": template.HTML(elements.ListModels(models)), + // "ApplicationConfig": appConfig, + } + + // Render index + return c.Render("views/models", summary) + }) + + // HTMX: return the model details + // https://htmx.org/examples/active-search/ + app.Post("/browse/search/models", auth, func(c *fiber.Ctx) error { + form := struct { + Search string `form:"search"` + }{} + if err := c.BodyParser(&form); err != nil { + return c.Status(fiber.StatusBadRequest).SendString(err.Error()) + } + + models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath) + + filteredModels := []*gallery.GalleryModel{} + for _, m := range models { + if strings.Contains(m.Name, form.Search) { + filteredModels = append(filteredModels, m) + } + } + + return c.SendString(elements.ListModels(filteredModels)) + }) + + // https://htmx.org/examples/progress-bar/ + app.Post("/browse/install/model/:id", auth, func(c *fiber.Ctx) error { + galleryID := strings.Clone(c.Params("id")) // strings.Clone is required! + + id, err := uuid.NewUUID() + if err != nil { + return err + } + + uid := id.String() + + op := gallery.GalleryOp{ + Id: uid, + GalleryName: galleryID, + Galleries: appConfig.Galleries, + } + go func() { + galleryService.C <- op + }() + + return c.SendString(elements.StartProgressBar(uid, "0")) + }) + + // https://htmx.org/examples/progress-bar/ + app.Get("/browse/job/progress/:uid", auth, func(c *fiber.Ctx) error { + jobUID := c.Params("uid") + + status := galleryService.GetStatus(jobUID) + if status == nil { + //fmt.Errorf("could not find any status for ID") + return c.SendString(elements.ProgressBar("0")) + } + + if status.Progress == 100 { + c.Set("HX-Trigger", "done") + return c.SendString(elements.ProgressBar("100")) + } + if status.Error != nil { + return c.SendString(elements.ErrorProgress(status.Error.Error())) + } + + return c.SendString(elements.ProgressBar(fmt.Sprint(status.Progress))) + }) + + app.Get("/browse/job/:uid", auth, func(c *fiber.Ctx) error { + return c.SendString(elements.DoneProgress(c.Params("uid"))) + }) +} diff --git a/core/http/routes/welcome.go b/core/http/routes/welcome.go index 29b9e586..6b600d2d 100644 --- a/core/http/routes/welcome.go +++ b/core/http/routes/welcome.go @@ -13,11 +13,7 @@ func RegisterPagesRoutes(app *fiber.App, appConfig *config.ApplicationConfig, auth func(*fiber.Ctx) error) { - models, _ := ml.ListModels() - backendConfigs := cl.GetAllBackendConfigs() - if !appConfig.DisableWelcomePage { - app.Get("/", auth, localai.WelcomeEndpoint(appConfig, models, backendConfigs)) + app.Get("/", auth, localai.WelcomeEndpoint(appConfig, cl, ml)) } - } diff --git a/core/http/views/models.html b/core/http/views/models.html new file mode 100644 index 00000000..63c6bba0 --- /dev/null +++ b/core/http/views/models.html @@ -0,0 +1,40 @@ + + +{{template "views/partials/head" .}} + + +
+ + {{template "views/partials/navbar" .}} +
+
+

Welcome to your LocalAI instance!

+
+ +
+

The FOSS alternative to OpenAI, Claude, ...

+ + Documentation + +
+ +
+

Available models from repositories

+ + + + +
{{.Models}}
+
+
+ + {{template "views/partials/footer" .}} +
+ + + diff --git a/core/http/views/partials/head.html b/core/http/views/partials/head.html index 59cdea33..9dbfecdb 100644 --- a/core/http/views/partials/head.html +++ b/core/http/views/partials/head.html @@ -3,11 +3,76 @@ {{.Title}} - + + + + + \ No newline at end of file diff --git a/core/http/views/partials/navbar.html b/core/http/views/partials/navbar.html index c3d3223f..36332ed2 100644 --- a/core/http/views/partials/navbar.html +++ b/core/http/views/partials/navbar.html @@ -9,6 +9,7 @@ diff --git a/docs/content/docs/overview.md b/docs/content/docs/overview.md index 5224bc49..f0f59494 100644 --- a/docs/content/docs/overview.md +++ b/docs/content/docs/overview.md @@ -56,7 +56,7 @@ icon = "info" -**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that's compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families and architectures. Does not require GPU. It is maintained by [mudler](https://github.com/mudler). +**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that's compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families and architectures. Does not require GPU. It is created and maintained by [Ettore Di Giacinto](https://github.com/mudler). ## Start LocalAI diff --git a/go.mod b/go.mod index 0bf9aa02..9485383e 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,8 @@ module github.com/go-skynet/LocalAI -go 1.21 +go 1.21.1 + +toolchain go1.22.2 require ( github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf @@ -71,6 +73,7 @@ require ( github.com/beorn7/perks v1.0.1 // indirect github.com/cenkalti/backoff/v4 v4.1.3 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/chasefleming/elem-go v0.25.0 // indirect github.com/containerd/continuity v0.3.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/dlclark/regexp2 v1.8.1 // indirect diff --git a/go.sum b/go.sum index 55fdaf06..b68834b2 100644 --- a/go.sum +++ b/go.sum @@ -37,6 +37,8 @@ github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/charmbracelet/glamour v0.7.0 h1:2BtKGZ4iVJCDfMF229EzbeR1QRKLWztO9dMtjmqZSng= github.com/charmbracelet/glamour v0.7.0/go.mod h1:jUMh5MeihljJPQbJ/wf4ldw2+yBP59+ctV36jASy7ps= +github.com/chasefleming/elem-go v0.25.0 h1:LYzr1auk39Bh3bdKloArOFV7sOBnOfSOKxsg58eWL0Q= +github.com/chasefleming/elem-go v0.25.0/go.mod h1:hz73qILBIKnTgOujnSMtEj20/epI+f6vg71RUilJAA4= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= diff --git a/main.go b/main.go index 9976906b..04f13d3f 100644 --- a/main.go +++ b/main.go @@ -72,7 +72,7 @@ Version: ${version} kong.Vars{ "basepath": kong.ExpandPath("."), "remoteLibraryURL": "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/model_library.yaml", - "galleries": `[{"name":"localai", "url":"github:mudler/LocalAI/gallery/index.yaml"}]`, + "galleries": `[{"name":"localai", "url":"github:mudler/LocalAI/gallery/index.yaml@master"}]`, "version": internal.PrintableVersion(), }, ) diff --git a/pkg/downloader/progress.go b/pkg/downloader/progress.go index 6806f586..6cd6132b 100644 --- a/pkg/downloader/progress.go +++ b/pkg/downloader/progress.go @@ -5,6 +5,8 @@ import "hash" type progressWriter struct { fileName string total int64 + fileNo int + totalFiles int written int64 downloadStatus func(string, string, string, float64) hash hash.Hash @@ -16,6 +18,17 @@ func (pw *progressWriter) Write(p []byte) (n int, err error) { if pw.total > 0 { percentage := float64(pw.written) / float64(pw.total) * 100 + if pw.totalFiles > 1 { + // This is a multi-file download + // so we need to adjust the percentage + // to reflect the progress of the whole download + // This is the file pw.fileNo of pw.totalFiles files. We assume that + // the files before successfully downloaded. + percentage = percentage / float64(pw.totalFiles) + if pw.fileNo > 1 { + percentage += float64(pw.fileNo-1) * 100 / float64(pw.totalFiles) + } + } //log.Debug().Msgf("Downloading %s: %s/%s (%.2f%%)", pw.fileName, formatBytes(pw.written), formatBytes(pw.total), percentage) pw.downloadStatus(pw.fileName, formatBytes(pw.written), formatBytes(pw.total), percentage) } else { diff --git a/pkg/downloader/uri.go b/pkg/downloader/uri.go index b678ae0d..46ccd6a1 100644 --- a/pkg/downloader/uri.go +++ b/pkg/downloader/uri.go @@ -136,7 +136,7 @@ func removePartialFile(tmpFilePath string) error { return nil } -func DownloadFile(url string, filePath, sha string, downloadStatus func(string, string, string, float64)) error { +func DownloadFile(url string, filePath, sha string, fileN, total int, downloadStatus func(string, string, string, float64)) error { url = ConvertURL(url) // Check if the file already exists _, err := os.Stat(filePath) @@ -209,6 +209,8 @@ func DownloadFile(url string, filePath, sha string, downloadStatus func(string, fileName: tmpFilePath, total: resp.ContentLength, hash: sha256.New(), + fileNo: fileN, + totalFiles: total, downloadStatus: downloadStatus, } _, err = io.Copy(io.MultiWriter(outFile, progress), resp.Body) diff --git a/pkg/gallery/models.go b/pkg/gallery/models.go index 10caedee..59971bbc 100644 --- a/pkg/gallery/models.go +++ b/pkg/gallery/models.go @@ -102,7 +102,7 @@ func InstallModel(basePath, nameOverride string, config *Config, configOverrides } // Download files and verify their SHA - for _, file := range config.Files { + for i, file := range config.Files { log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename) if err := utils.VerifyPath(file.Filename, basePath); err != nil { @@ -111,7 +111,7 @@ func InstallModel(basePath, nameOverride string, config *Config, configOverrides // Create file path filePath := filepath.Join(basePath, file.Filename) - if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, downloadStatus); err != nil { + if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, i, len(config.Files), downloadStatus); err != nil { return err } } diff --git a/pkg/gallery/op.go b/pkg/gallery/op.go index 99796812..73d748bf 100644 --- a/pkg/gallery/op.go +++ b/pkg/gallery/op.go @@ -1,11 +1,12 @@ package gallery type GalleryOp struct { - Req GalleryModel Id string - Galleries []Gallery GalleryName string ConfigURL string + + Req GalleryModel + Galleries []Gallery } type GalleryOpStatus struct { diff --git a/pkg/startup/model_preload.go b/pkg/startup/model_preload.go index b09516a7..d267d846 100644 --- a/pkg/startup/model_preload.go +++ b/pkg/startup/model_preload.go @@ -54,7 +54,7 @@ func PreloadModelsConfigurations(modelLibraryURL string, modelPath string, model // check if file exists if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) { modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml" - err := downloader.DownloadFile(url, modelDefinitionFilePath, "", func(fileName, current, total string, percent float64) { + err := downloader.DownloadFile(url, modelDefinitionFilePath, "", 0, 0, func(fileName, current, total string, percent float64) { utils.DisplayDownloadFunction(fileName, current, total, percent) }) if err != nil { From 8e36fe9b6fc51c0a13a18302b647655b52fff0aa Mon Sep 17 00:00:00 2001 From: fakezeta Date: Tue, 23 Apr 2024 18:42:17 +0200 Subject: [PATCH 0102/2648] Transformers Backend: max_tokens adherence to OpenAI API (#2108) max token adherence to OpenAI API improve adherence to OpenAI API when max tokens is omitted or equal to 0 in the request --- .../python/transformers/transformers_server.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py index 1b38a956..90053ed5 100755 --- a/backend/python/transformers/transformers_server.py +++ b/backend/python/transformers/transformers_server.py @@ -159,6 +159,11 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): quantization_config=quantization, device_map=device_map, torch_dtype=compute) + if request.ContextSize > 0: + self.max_tokens = request.ContextSize + else: + self.max_tokens = self.model.config.max_position_embeddings + self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True) self.XPU = False @@ -217,10 +222,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): if request.TopK == 0: request.TopK = 40 - max_tokens = 200 - if request.Tokens > 0: - max_tokens = request.Tokens - prompt = request.Prompt if not request.Prompt and request.UseTokenizerTemplate and request.Messages: prompt = self.tokenizer.apply_chat_template(request.Messages, tokenize=False, add_generation_prompt=True) @@ -232,6 +233,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): eos_token_id.append(self.tokenizer.convert_tokens_to_ids(word)) inputs = self.tokenizer(prompt, return_tensors="pt") + + if request.Tokens > 0: + max_tokens = request.Tokens + else: + max_tokens = self.max_tokens - inputs["input_ids"].size()[inputs["input_ids"].dim()-1] + if self.CUDA: inputs = inputs.to("cuda") if XPU and self.OV == False: From 3411e072ca8d5c4a34267287ded4a2ad03bfb36d Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Tue, 23 Apr 2024 11:43:00 -0500 Subject: [PATCH 0103/2648] Fix cleanup sonarqube findings (#2106) * fix: update dockerignore and gitignore to exclude sonarqube work dir Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: remove useless equality check Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: use sonarqube Dockerfile recommendations Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --------- Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- .dockerignore | 5 ++++- .gitignore | 3 +++ Dockerfile | 23 +++++++++++----------- core/http/endpoints/openai/assistant.go | 26 ++++++++++++------------- 4 files changed, 31 insertions(+), 26 deletions(-) diff --git a/.dockerignore b/.dockerignore index 2c394c48..ea2ea6b2 100644 --- a/.dockerignore +++ b/.dockerignore @@ -5,4 +5,7 @@ models examples/chatbot-ui/models examples/rwkv/models examples/**/models -Dockerfile* \ No newline at end of file +Dockerfile* + +# SonarQube +.scannerwork \ No newline at end of file diff --git a/.gitignore b/.gitignore index f1f860e9..9338b0c4 100644 --- a/.gitignore +++ b/.gitignore @@ -44,3 +44,6 @@ prepare *.pb.go *pb2.py *pb2_grpc.py + +# SonarQube +.scannerwork \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 805ac3a6..4bc8b35e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,7 +3,7 @@ ARG BASE_IMAGE=ubuntu:22.04 ARG GRPC_BASE_IMAGE=${BASE_IMAGE} # extras or core -FROM ${BASE_IMAGE} as requirements-core +FROM ${BASE_IMAGE} AS requirements-core USER root @@ -24,7 +24,7 @@ RUN apt-get update && \ apt-get install -y ca-certificates curl python3-pip unzip && apt-get clean # Install Go -RUN curl -L -s https://go.dev/dl/go$GO_VERSION.linux-$TARGETARCH.tar.gz | tar -C /usr/local -xz +RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz ENV PATH $PATH:/usr/local/go/bin # Install grpc compilers @@ -80,7 +80,7 @@ RUN test -n "$TARGETARCH" \ ################################### ################################### -FROM requirements-core as requirements-extras +FROM requirements-core AS requirements-extras RUN apt install -y gpg && \ curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \ @@ -105,7 +105,7 @@ RUN if [ ! -e /usr/bin/python ]; then \ ################################### ################################### -FROM ${GRPC_BASE_IMAGE} as grpc +FROM ${GRPC_BASE_IMAGE} AS grpc ARG MAKEFLAGS ARG GRPC_VERSION=v1.58.0 @@ -121,16 +121,15 @@ RUN apt-get update && \ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc -RUN cd grpc && \ - mkdir -p cmake/build && \ - cd cmake/build && \ - cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF ../.. && \ +WORKDIR /build/grpc/cmake/build + +RUN cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF ../.. && \ make ################################### ################################### -FROM requirements-${IMAGE_TYPE} as builder +FROM requirements-${IMAGE_TYPE} AS builder ARG GO_TAGS="stablediffusion tts" ARG GRPC_BACKENDS @@ -168,9 +167,11 @@ RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build COPY --from=grpc /build/grpc ./grpc/ -RUN cd /build/grpc/cmake/build && make install +WORKDIR /build/grpc/cmake/build +RUN make install # Rebuild with defaults backends +WORKDIR /build RUN make build RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \ @@ -288,7 +289,7 @@ RUN mkdir -p /build/models # Define the health check command HEALTHCHECK --interval=1m --timeout=10m --retries=10 \ - CMD curl -f $HEALTHCHECK_ENDPOINT || exit 1 + CMD curl -f ${HEALTHCHECK_ENDPOINT} || exit 1 VOLUME /build/models EXPOSE 8080 diff --git a/core/http/endpoints/openai/assistant.go b/core/http/endpoints/openai/assistant.go index dceb3789..c1efd8bd 100644 --- a/core/http/endpoints/openai/assistant.go +++ b/core/http/endpoints/openai/assistant.go @@ -455,21 +455,19 @@ func DeleteAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model for i, assistant := range Assistants { if assistant.ID == assistantID { for j, fileId := range assistant.FileIDs { - if fileId == fileId { - Assistants[i].FileIDs = append(Assistants[i].FileIDs[:j], Assistants[i].FileIDs[j+1:]...) + Assistants[i].FileIDs = append(Assistants[i].FileIDs[:j], Assistants[i].FileIDs[j+1:]...) - // Check if the file exists in the assistantFiles slice - for i, assistantFile := range AssistantFiles { - if assistantFile.ID == fileId { - // Remove the file from the assistantFiles slice - AssistantFiles = append(AssistantFiles[:i], AssistantFiles[i+1:]...) - utils.SaveConfig(appConfig.ConfigsDir, AssistantsFileConfigFile, AssistantFiles) - return c.Status(fiber.StatusOK).JSON(DeleteAssistantFileResponse{ - ID: fileId, - Object: "assistant.file.deleted", - Deleted: true, - }) - } + // Check if the file exists in the assistantFiles slice + for i, assistantFile := range AssistantFiles { + if assistantFile.ID == fileId { + // Remove the file from the assistantFiles slice + AssistantFiles = append(AssistantFiles[:i], AssistantFiles[i+1:]...) + utils.SaveConfig(appConfig.ConfigsDir, AssistantsFileConfigFile, AssistantFiles) + return c.Status(fiber.StatusOK).JSON(DeleteAssistantFileResponse{ + ID: fileId, + Object: "assistant.file.deleted", + Deleted: true, + }) } } } From d344daf129e5d4504ce29ada434b6e6b1025ce31 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 23 Apr 2024 18:43:25 +0200 Subject: [PATCH 0104/2648] feat(models-ui): minor visual enhancements (#2109) Show image if present, URL, tags, and better display buttons Signed-off-by: Ettore Di Giacinto --- core/http/elements/gallery.go | 96 ++++++++++++++++++++++++++++++----- core/http/routes/ui.go | 10 ++-- core/http/views/models.html | 18 +++---- 3 files changed, 96 insertions(+), 28 deletions(-) diff --git a/core/http/elements/gallery.go b/core/http/elements/gallery.go index 370ca82d..405f42ae 100644 --- a/core/http/elements/gallery.go +++ b/core/http/elements/gallery.go @@ -86,6 +86,18 @@ func StartProgressBar(uid, progress string) string { ).Render() } +func cardSpan(text, icon string) elem.Node { + return elem.Span( + attrs.Props{ + "class": "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2", + }, + elem.I(attrs.Props{ + "class": icon + " pr-2", + }), + elem.Text(text), + ) +} + func ListModels(models []*gallery.GalleryModel) string { modelsElements := []elem.Node{} span := func(s string) elem.Node { @@ -99,10 +111,17 @@ func ListModels(models []*gallery.GalleryModel) string { installButton := func(m *gallery.GalleryModel) elem.Node { return elem.Button( attrs.Props{ - "class": "float-right inline-block rounded bg-primary px-6 pb-2 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong", + "data-twe-ripple-init": "", + "data-twe-ripple-color": "light", + "class": "float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong", // post the Model ID as param "hx-post": "/browse/install/model/" + fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name), }, + elem.I( + attrs.Props{ + "class": "fa-solid fa-download pr-2", + }, + ), elem.Text("Install"), ) } @@ -111,7 +130,7 @@ func ListModels(models []*gallery.GalleryModel) string { return elem.Div( attrs.Props{ - "class": "p-6", + "class": "p-6 text-surface dark:text-white", }, elem.H5( attrs.Props{ @@ -129,42 +148,93 @@ func ListModels(models []*gallery.GalleryModel) string { } actionDiv := func(m *gallery.GalleryModel) elem.Node { + nodes := []elem.Node{ + cardSpan("Repository: "+m.Gallery.Name, "fa-brands fa-git-alt"), + } + + if m.License != "" { + nodes = append(nodes, + cardSpan("License: "+m.License, "fas fa-book"), + ) + } + + for _, tag := range m.Tags { + nodes = append(nodes, + cardSpan(tag, "fas fa-tag"), + ) + } + + for i, url := range m.URLs { + nodes = append(nodes, + elem.A( + attrs.Props{ + "class": "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2", + "href": url, + "target": "_blank", + }, + elem.I(attrs.Props{ + "class": "fas fa-link pr-2", + }), + elem.Text("Link #"+fmt.Sprintf("%d", i+1)), + )) + } + return elem.Div( attrs.Props{ "class": "px-6 pt-4 pb-2", }, - elem.Span( + elem.P( attrs.Props{ - "class": "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2", + "class": "mb-4 text-base", }, - elem.Text("Repository: "+m.Gallery.Name), + nodes..., ), elem.If(m.Installed, span("Installed"), installButton(m)), ) } for _, m := range models { + + elems := []elem.Node{} + + if m.Icon != "" { + elems = append(elems, + + elem.Div(attrs.Props{ + "class": "flex justify-center items-center", + }, + elem.A(attrs.Props{ + "href": "#!", + // "class": "justify-center items-center", + }, + elem.Img(attrs.Props{ + // "class": "rounded-t-lg object-fit object-center h-96", + "class": "rounded-t-lg max-h-48 max-w-96 object-cover", + "src": m.Icon, + }), + ), + )) + } + + elems = append(elems, descriptionDiv(m), actionDiv(m)) modelsElements = append(modelsElements, elem.Div( attrs.Props{ - "class": "me-4 mb-2 block rounded-lg bg-white shadow-secondary-1 dark:bg-gray-800 dark:bg-surface-dark dark:text-white text-surface p-2", + "class": " me-4 mb-2 block rounded-lg bg-white shadow-secondary-1 dark:bg-gray-800 dark:bg-surface-dark dark:text-white text-surface pb-2", }, elem.Div( attrs.Props{ - "class": "p-6", + // "class": "p-6", }, - descriptionDiv(m), - actionDiv(m), - // elem.If(m.Installed, span("Installed"), installButton(m)), - - // elem.If(m.Installed, span("Installed"), span("Not Installed")), + elems..., ), ), ) } wrapper := elem.Div(attrs.Props{ - "class": "dark grid grid-cols-1 grid-rows-1 md:grid-cols-2 ", + "class": "dark grid grid-cols-1 grid-rows-1 md:grid-cols-3 block rounded-lg shadow-secondary-1 dark:bg-surface-dark", + //"class": "block rounded-lg bg-white shadow-secondary-1 dark:bg-surface-dark", }, modelsElements...) return wrapper.Render() diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go index b9ccd89a..c64ec5ff 100644 --- a/core/http/routes/ui.go +++ b/core/http/routes/ui.go @@ -26,8 +26,9 @@ func RegisterUIRoutes(app *fiber.App, models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath) summary := fiber.Map{ - "Title": "LocalAI API - Models", - "Models": template.HTML(elements.ListModels(models)), + "Title": "LocalAI - Models", + "Models": template.HTML(elements.ListModels(models)), + "Repositories": appConfig.Galleries, // "ApplicationConfig": appConfig, } @@ -49,7 +50,10 @@ func RegisterUIRoutes(app *fiber.App, filteredModels := []*gallery.GalleryModel{} for _, m := range models { - if strings.Contains(m.Name, form.Search) { + if strings.Contains(m.Name, form.Search) || + strings.Contains(m.Description, form.Search) || + strings.Contains(m.Gallery.Name, form.Search) || + strings.Contains(strings.Join(m.Tags, ","), form.Search) { filteredModels = append(filteredModels, m) } } diff --git a/core/http/views/models.html b/core/http/views/models.html index 63c6bba0..be3c1bef 100644 --- a/core/http/views/models.html +++ b/core/http/views/models.html @@ -7,20 +7,14 @@ {{template "views/partials/navbar" .}}
-
-

Welcome to your LocalAI instance!

-
- -
-

The FOSS alternative to OpenAI, Claude, ...

- - Documentation - -
-

Available models from repositories

- +

+ 🖼️ Available models from {{ len .Repositories }} repositories + +

+ + Date: Tue, 23 Apr 2024 19:35:45 +0200 Subject: [PATCH 0105/2648] feat(gallery): add llama3, hermes, phi-3, and others (#2110) Also adds embeddings and llava models Signed-off-by: Ettore Di Giacinto --- gallery/codellama.yaml | 21 + gallery/dreamshaper.yaml | 26 ++ gallery/hermes-2-pro-mistral.yaml | 81 ++++ gallery/index.yaml | 668 ++++++++++++++++++++---------- gallery/llama3-instruct.yaml | 64 +++ gallery/llava.yaml | 32 ++ gallery/phi-2-chat.yaml | 50 +++ gallery/phi-2-orange.yaml | 33 ++ gallery/phi-3-chat.yaml | 31 ++ gallery/piper.yaml | 15 + gallery/sentencetransformers.yaml | 12 + 11 files changed, 804 insertions(+), 229 deletions(-) create mode 100644 gallery/codellama.yaml create mode 100644 gallery/dreamshaper.yaml create mode 100644 gallery/hermes-2-pro-mistral.yaml create mode 100644 gallery/llama3-instruct.yaml create mode 100644 gallery/llava.yaml create mode 100644 gallery/phi-2-chat.yaml create mode 100644 gallery/phi-2-orange.yaml create mode 100644 gallery/phi-3-chat.yaml create mode 100644 gallery/piper.yaml create mode 100644 gallery/sentencetransformers.yaml diff --git a/gallery/codellama.yaml b/gallery/codellama.yaml new file mode 100644 index 00000000..1b773ed6 --- /dev/null +++ b/gallery/codellama.yaml @@ -0,0 +1,21 @@ +name: "codellama" +license: llama2 + +description: | + Code Llama is a collection of pretrained and fine-tuned generative text models ranging in scale from 7 billion to 34 billion parameters. This model is designed for general code synthesis and understanding. + +urls: +- https://huggingface.co/TheBloke/CodeLlama-7B-GGUF +- https://huggingface.co/meta-llama/CodeLlama-7b-hf + +tags: +- llm +- gguf +- gpu +- cpu + +config_file: | + backend: llama-cpp + context_size: 4096 + f16: true + mmap: true \ No newline at end of file diff --git a/gallery/dreamshaper.yaml b/gallery/dreamshaper.yaml new file mode 100644 index 00000000..894ae0cf --- /dev/null +++ b/gallery/dreamshaper.yaml @@ -0,0 +1,26 @@ +name: "dreamshaper" +icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/ggO2sBDJ8Bhc6w-zwTx5j.png +license: other + +description: | + A text-to-image model that uses Stable Diffusion 1.5 to generate images from text prompts. This model is DreamShaper model by Lykon. + +urls: +- https://civitai.com/models/4384/dreamshaper + +tags: +- text-to-image +- stablediffusion +- sd-1.5 +- gpu + +config_file: | + backend: diffusers + step: 25 + f16: true + + diffusers: + pipeline_type: StableDiffusionPipeline + cuda: true + enable_parameters: "negative_prompt,num_inference_steps" + scheduler_type: "k_dpmpp_2m" diff --git a/gallery/hermes-2-pro-mistral.yaml b/gallery/hermes-2-pro-mistral.yaml new file mode 100644 index 00000000..5a79d5cb --- /dev/null +++ b/gallery/hermes-2-pro-mistral.yaml @@ -0,0 +1,81 @@ +name: "hermes-2-pro-mistral" +icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/ggO2sBDJ8Bhc6w-zwTx5j.png +license: apache-2.0 + +description: | + Hermes 2 Pro is an upgraded, retrained version of Nous Hermes 2, consisting of an updated and cleaned version of the OpenHermes 2.5 Dataset, as well as a newly introduced Function Calling and JSON Mode dataset developed in-house. + + This new version of Hermes maintains its excellent general task and conversation capabilities - but also excels at Function Calling, JSON Structured Outputs, and has improved on several other metrics as well, scoring a 90% on our function calling evaluation built in partnership with Fireworks.AI, and an 81% on our structured JSON Output evaluation. + + Hermes Pro takes advantage of a special system prompt and multi-turn function calling structure with a new chatml role in order to make function calling reliable and easy to parse. Learn more about prompting below. + + This work was a collaboration between Nous Research, @interstellarninja, and Fireworks.AI + + Learn more about the function calling on our github repo here: https://github.com/NousResearch/Hermes-Function-Calling/tree/main + +urls: +- https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF + +tags: +- llm +- gguf +- gpu +- cpu + +config_file: | + mmap: true + parameters: + model: Hermes-2-Pro-Mistral-7B.Q6_K.gguf + + template: + chat_message: | + <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + + {{- end }} + {{- if .Content}} + {{.Content }} + {{- end }} + {{- if .FunctionCall}} + {{toJson .FunctionCall}} + {{- end }} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + + {{- end }} + <|im_end|> + # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling + function: | + <|im_start|>system + You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: + + {{range .Functions}} + {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} + {{end}} + + Use the following pydantic model json schema for each tool call you will make: + {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']} + For each function call return a json object with function name and arguments within XML tags as follows: + + {'arguments': , 'name': } + + <|im_end|> + {{.Input -}} + <|im_start|>assistant + + chat: | + {{.Input -}} + <|im_start|>assistant + completion: | + {{.Input}} + context_size: 4096 + f16: true + stopwords: + - <|im_end|> + - + - "\n" + - "\n\n\n" + diff --git a/gallery/index.yaml b/gallery/index.yaml index 6b882768..4582838e 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1,503 +1,713 @@ +## LLM + +### START LLAMA3 +- &llama3 + url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master" + name: "llama3-8b-instruct" + overrides: + parameters: + model: Meta-Llama-3-8B-Instruct-Q5_K_M.gguf + files: + - filename: vicuna-7b-q5_k.gguf + sha256: cce3ba85525027d0fff520cad053d5a6f32c293382a40b3d55a650282c267787 + uri: huggingface://second-state/Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct-Q5_K_M.gguf + +### START LLaVa +- &llava + url: "github:mudler/LocalAI/gallery/llava.yaml@master" + name: "llava-1.6-vicuna" + overrides: + mmproj: mmproj-vicuna7b-f16.gguf + parameters: + model: vicuna-7b-q5_k.gguf + files: + - filename: vicuna-7b-q5_k.gguf + uri: https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/vicuna-7b-q5_k.gguf + - filename: mmproj-vicuna7b-f16.gguf + uri: https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/mmproj-vicuna7b-f16.gguf +- <<: *llava + name: "llava-1.6-mistral" + overrides: + mmproj: llava-v1.6-7b-mmproj-f16.gguf + parameters: + model: llava-v1.6-mistral-7b.gguf + files: + - filename: llava-v1.6-mistral-7b.gguf + sha256: 31826170ffa2e8080bbcd74cac718f906484fd5a59895550ef94c1baa4997595 + uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q6_K.gguf + - filename: llava-v1.6-7b-mmproj-f16.gguf + sha256: 00205ee8a0d7a381900cd031e43105f86aa0d8c07bf329851e85c71a26632d16 + uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf +- <<: *llava + name: "llava-1.5" + overrides: + mmproj: llava-v1.5-7b-mmproj-Q8_0.gguf + parameters: + model: llava-v1.5-7b-Q4_K.gguf + files: + - filename: llava-v1.5-7b-Q4_K.gguf + sha256: c91ebf0a628ceb25e374df23ad966cc1bf1514b33fecf4f0073f9619dec5b3f9 + uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-Q4_K.gguf + - filename: llava-v1.5-7b-mmproj-Q8_0.gguf + sha256: 09c230de47f6f843e4841656f7895cac52c6e7ec7392acb5e8527de8b775c45a + uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-mmproj-Q8_0.gguf +### START Phi-2 +- &phi-2 + url: "github:mudler/LocalAI/gallery/phi-2-chat.yaml@master" + name: "phi-2-chat:Q8_0" + overrides: + parameters: + model: phi-2-layla-v1-chatml-Q8_0.gguf + files: + - filename: "phi-2-layla-v1-chatml-Q8_0.gguf" + sha256: "0cf542a127c2c835066a78028009b7eddbaf773cc2a26e1cb157ce5e09c1a2e0" + uri: "huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf" +- <<: *phi-2 + name: "phi-2-chat" + overrides: + parameters: + model: phi-2-layla-v1-chatml-Q4_K.gguf + files: + - filename: "phi-2-layla-v1-chatml-Q4_K.gguf" + sha256: "b071e5624b60b8911f77261398802c4b4079c6c689e38e2ce75173ed62bc8a48" + uri: "huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q4_K.gguf" +- <<: *phi-2 + name: "phi-2-orange" + overrides: + parameters: + model: phi-2-orange.Q4_0.gguf + files: + - filename: "phi-2-orange.Q4_0.gguf" + sha256: "49cb710ae688e1b19b1b299087fa40765a0cd677e3afcc45e5f7ef6750975dcf" + uri: "huggingface://TheBloke/phi-2-orange-GGUF/phi-2-orange.Q4_0.gguf" +### START Phi-3 +- &phi-3 + url: "github:mudler/LocalAI/gallery/phi-3-chat.yaml@master" + name: "phi-3-mini-4k-instruct" + overrides: + parameters: + model: Phi-3-mini-4k-instruct-q4.gguf + files: + - filename: "Phi-3-mini-4k-instruct-q4.gguf" + sha256: "4fed7364ee3e0c7cb4fe0880148bfdfcd1b630981efa0802a6b62ee52e7da97e" + uri: "huggingface://microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-q4.gguf" +- <<: *phi-3 + name: "phi-3-mini-4k-instruct:fp16" + overrides: + parameters: + model: Phi-3-mini-4k-instruct-fp16.gguf + files: + - filename: "Phi-3-mini-4k-instruct-fp16.gguf" + sha256: "ad9f8ff11cd096115adc8ff50befa22fc3da2718672ddd2ab30faccd70488605" + uri: "huggingface://microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-fp16.gguf" +### START Hermes-2-Pro-Mistral +- &hermes-2-pro-mistral + url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master" + name: "hermes-2-pro-mistral" + overrides: + parameters: + model: Hermes-2-Pro-Mistral-7B.Q4_0.gguf + files: + - filename: "Hermes-2-Pro-Mistral-7B.Q4_0.gguf" + sha256: "f446c3125026f7af6757dd097dda02280adc85e908c058bd6f1c41a118354745" + uri: "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q4_0.gguf" +- <<: *hermes-2-pro-mistral + name: "hermes-2-pro-mistral:Q6_K" + overrides: + parameters: + model: Hermes-2-Pro-Mistral-7B.Q6_K.gguf + files: + - filename: "Hermes-2-Pro-Mistral-7B.Q6_K.gguf" + sha256: "40adc3b227bc36764de148fdda4df5df385adc06650d58d4dbe726ee0214eeff" + uri: "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf" +- <<: *hermes-2-pro-mistral + name: "hermes-2-pro-mistral" + overrides: + parameters: + model: Hermes-2-Pro-Mistral-7B.Q8_0.gguf + files: + - filename: "Hermes-2-Pro-Mistral-7B.Q8_0.gguf" + sha256: "b6d95d7ec9a395b7568cc94b0447fd4f90b6f69d6e44794b1fbb84e3f732baca" + uri: "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q8_0.gguf" +### END Hermes-2-Pro-Mistral + +### START Codellama +- &codellama + url: "github:mudler/LocalAI/gallery/codellama.yaml@master" + name: "codellama-7b" + overrides: + parameters: + model: codellama-7b.Q4_0.gguf + files: + - filename: "codellama-7b.Q4_0.gguf" + sha256: "33052f6dd41436db2f83bd48017b6fff8ce0184e15a8a227368b4230f1da97b5" + uri: "huggingface://TheBloke/CodeLlama-7B-GGUF/codellama-7b.Q4_0.gguf" + +### START Embeddings +- &sentencentransformers + name: "all-MiniLM-L6-v2" + url: "github:mudler/LocalAI/gallery/sentencetransformers.yaml@master" + overrides: + parameters: + model: all-MiniLM-L6-v2 + +### START Image generation +- &diffusers + name: dreamshaper + url: "github:mudler/LocalAI/gallery/dreamshaper.yaml@master" + overrides: + parameters: + model: DreamShaper_8_pruned.safetensors + files: + - filename: DreamShaper_8_pruned.safetensors + uri: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors + sha256: 879db523c30d3b9017143d56705015e15a2cb5628762c11d086fed9538abd7fd + ## Whisper - url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master" name: "whisper-1" - license: other + ## Bert embeddings - url: "github:mudler/LocalAI/gallery/bert-embeddings.yaml@master" name: "bert-embeddings" - license: other + - url: "github:mudler/LocalAI/gallery/bert-embeddings.yaml@master" name: "text-embedding-ada-002" - license: other + ## Stable Diffusion - url: github:mudler/LocalAI/gallery/stablediffusion.yaml@master name: stablediffusion - license: other + ## Tiny Dream - url: github:mudler/LocalAI/gallery/tinydream.yaml@master name: tinydream - license: other + ## Piper TTS -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-kathleen-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + override: + parameters: + model: en-us-kathleen-low.onnx files: - filename: voice-en-us-kathleen-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-kathleen-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-ca-upc_ona-x-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + override: + parameters: + model: ca-upc_ona-x-low.onnx files: - filename: voice-ca-upc_ona-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ca-upc_ona-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-ca-upc_pau-x-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + override: + parameters: + model: ca-upc_pau-x-low.onnx files: - filename: voice-ca-upc_pau-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ca-upc_pau-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-da-nst_talesyntese-medium - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + override: + parameters: + model: da-nst_talesyntese-medium.onnx files: - filename: voice-da-nst_talesyntese-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-da-nst_talesyntese-medium.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-de-eva_k-x-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + override: + parameters: + model: de-eva_k-x-low.onnx files: - filename: voice-de-eva_k-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-eva_k-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-de-karlsson-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + override: + parameters: + model: de-karlsson-low.onnx files: - filename: voice-de-karlsson-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-karlsson-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-de-kerstin-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + override: + parameters: + model: de-kerstin-low.onnx files: - filename: voice-de-kerstin-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-kerstin-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-de-pavoque-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + override: + parameters: + model: de-pavoque-low.onnx files: - filename: voice-de-pavoque-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-pavoque-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-de-ramona-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + override: + parameters: + model: de-ramona-low.onnx files: - filename: voice-de-ramona-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-ramona-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-de-thorsten-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: de-thorsten-low.onnx files: - filename: voice-de-thorsten-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-thorsten-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-el-gr-rapunzelina-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: el-gr-rapunzelina-low.onnx files: - filename: voice-el-gr-rapunzelina-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-el-gr-rapunzelina-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-gb-alan-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: en-gb-alan-low.onnx files: - filename: voice-en-gb-alan-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-gb-alan-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-gb-southern_english_female-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: en-gb-southern_english files: - filename: voice-en-gb-southern_english_female-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-gb-southern_english_female-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-amy-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: en-us-amy-low.onnx files: - filename: voice-en-us-amy-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-danny-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: en-us-danny-low.onnx files: - filename: voice-en-us-danny-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-danny-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-kathleen-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: en-us-kathleen-low.onnx files: - filename: voice-en-us-kathleen-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-kathleen-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-lessac-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: en-us-lessac-low.onnx files: - filename: voice-en-us-lessac-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-lessac-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-lessac-medium - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: en-us-lessac-medium.onnx files: - filename: voice-en-us-lessac-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-lessac-medium.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-libritts-high - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: en-us-libritts-high.onnx files: - filename: voice-en-us-libritts-high.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-libritts-high.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-ryan-high - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: en-us-ryan-high.onnx files: - filename: voice-en-us-ryan-high.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-ryan-high.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-ryan-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: en-us-ryan-low.onnx files: - filename: voice-en-us-ryan-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-ryan-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-ryan-medium - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: en-us-ryan-medium.onnx files: - filename: voice-en-us-ryan-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-ryan-medium.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us_lessac - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + override: + parameters: + model: en-us-lessac.onnx files: - filename: voice-en-us_lessac.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us_lessac.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-es-carlfm-x-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + override: + parameters: + model: es-carlfm-x-low.onnx files: - filename: voice-es-carlfm-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-es-carlfm-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-es-mls_10246-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: es-mls_10246-low.onnx files: - filename: voice-es-mls_10246-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-es-mls_10246-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-es-mls_9972-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: es-mls_9972-low.onnx files: - filename: voice-es-mls_9972-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-es-mls_9972-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-fi-harri-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: fi-harri-low.onnx files: - filename: voice-fi-harri-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fi-harri-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-fr-gilles-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: fr-gilles-low.onnx files: - filename: voice-fr-gilles-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-gilles-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-fr-mls_1840-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: fr-mls_1840-low.onnx files: - filename: voice-fr-mls_1840-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-mls_1840-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-fr-siwis-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: fr-siwis-low.onnx files: - filename: voice-fr-siwis-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-siwis-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-fr-siwis-medium - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: fr-siwis-medium.onnx files: - filename: voice-fr-siwis-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-siwis-medium.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-is-bui-medium - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: is-bui-medium.onnx files: - filename: voice-is-bui-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-is-bui-medium.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-is-salka-medium - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: is-salka-medium.onnx files: - filename: voice-is-salka-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-is-salka-medium.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-is-steinn-medium - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: is-steinn-medium.onnx files: - filename: voice-is-steinn-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-is-steinn-medium.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-is-ugla-medium - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: is-ugla-medium.onnx files: - filename: voice-is-ugla-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-is-ugla-medium.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-it-riccardo_fasol-x-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: it-riccardo_fasol-x-low.onnx files: - filename: voice-it-riccardo_fasol-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-it-riccardo_fasol-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-kk-iseke-x-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: kk-iseke-x-low.onnx files: - filename: voice-kk-iseke-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-kk-iseke-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-kk-issai-high - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: kk-issai-high.onnx files: - filename: voice-kk-issai-high.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-kk-issai-high.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-kk-raya-x-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: kk-raya-x-low.onnx files: - filename: voice-kk-raya-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-kk-raya-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-ne-google-medium - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: ne-google-medium.onnx files: - filename: voice-ne-google-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ne-google-medium.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-ne-google-x-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: ne-google-x-low.onnx files: - filename: voice-ne-google-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ne-google-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-nl-mls_5809-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: nl-mls_5809-low.onnx files: - filename: voice-nl-mls_5809-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-mls_5809-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-nl-mls_7432-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: nl-mls_7432-low.onnx files: - filename: voice-nl-mls_7432-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-mls_7432-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-nl-nathalie-x-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: nl-nathalie-x-low.onnx files: - filename: voice-nl-nathalie-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-nathalie-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-nl-rdh-medium - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: nl-rdh-medium.onnx files: - filename: voice-nl-rdh-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-rdh-medium.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-nl-rdh-x-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: nl-rdh-x-low.onnx files: - filename: voice-nl-rdh-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-rdh-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-no-talesyntese-medium - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: no-talesyntese-medium.onnx files: - filename: voice-no-talesyntese-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-no-talesyntese-medium.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-pl-mls_6892-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: pl-mls_6892-low.onnx files: - filename: voice-pl-mls_6892-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-pl-mls_6892-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-pt-br-edresson-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: pt-br-edresson-low.onnx files: - filename: voice-pt-br-edresson-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-pt-br-edresson-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-ru-irinia-medium - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: ru-irinia-medium.onnx files: - filename: voice-ru-irinia-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ru-irinia-medium.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-sv-se-nst-medium - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: sv-se-nst-medium.onnx files: - filename: voice-sv-se-nst-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-sv-se-nst-medium.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-uk-lada-x-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: uk-lada-x-low.onnx files: - filename: voice-uk-lada-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-uk-lada-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-vi-25hours-single-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: vi-25hours-single-low.onnx files: - filename: voice-vi-25hours-single-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-vi-25hours-single-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-vi-vivos-x-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: vi-vivos-x-low.onnx files: - filename: voice-vi-vivos-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-vi-vivos-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-zh-cn-huayan-x-low - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: zh-cn-huayan-x-low.onnx files: - filename: voice-zh-cn-huayan-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-zh-cn-huayan-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/virtual.yaml@master +- url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-zh_CN-huayan-medium - license: other - urls: - - https://github.com/rhasspy/piper/releases/download/v0.0.2/ + + override: + parameters: + model: zh_CN-huayan-medium.onnx files: - filename: voice-zh_CN-huayan-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-zh_CN-huayan-medium.tar.gz \ No newline at end of file diff --git a/gallery/llama3-instruct.yaml b/gallery/llama3-instruct.yaml new file mode 100644 index 00000000..4e29e740 --- /dev/null +++ b/gallery/llama3-instruct.yaml @@ -0,0 +1,64 @@ +name: "llama3-instruct" +license: llama3 + +description: | + Meta developed and released the Meta Llama 3 family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8 and 70B sizes. The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks. Further, in developing these models, we took great care to optimize helpfulness and safety. + + Model developers Meta + + Variations Llama 3 comes in two sizes — 8B and 70B parameters — in pre-trained and instruction tuned variants. + + Input Models input text only. + + Output Models generate text and code only. + + Model Architecture Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety. +urls: +- https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct + +tags: +- llm +- gguf +- gpu +- cpu + +config_file: | + mmap: true + template: + chat_message: | + <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|> + + {{ if .FunctionCall -}} + Function call: + {{ else if eq .RoleName "tool" -}} + Function response: + {{ end -}} + {{ if .Content -}} + {{.Content -}} + {{ else if .FunctionCall -}} + {{ toJson .FunctionCall -}} + {{ end -}} + <|eot_id|> + function: | + <|start_header_id|>system<|end_header_id|> + + You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: + + {{range .Functions}} + {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} + {{end}} + + Use the following pydantic model json schema for each tool call you will make: + {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}<|eot_id|><|start_header_id|>assistant<|end_header_id|> + Function call: + chat: | + <|begin_of_text|>{{.Input }} + <|start_header_id|>assistant<|end_header_id|> + completion: | + {{.Input}} + context_size: 8192 + f16: true + stopwords: + - <|im_end|> + - + - "<|eot_id|>" diff --git a/gallery/llava.yaml b/gallery/llava.yaml new file mode 100644 index 00000000..159ae34c --- /dev/null +++ b/gallery/llava.yaml @@ -0,0 +1,32 @@ +name: "llava" +license: apache-2.0 + +description: | + LLaVA represents a novel end-to-end trained large multimodal model that combines a vision encoder and Vicuna for general-purpose visual and language understanding, achieving impressive chat capabilities mimicking spirits of the multimodal GPT-4 and setting a new state-of-the-art accuracy on Science QA. + +urls: +- https://llava-vl.github.io/ + +tags: +- llm +- multimodal +- gguf +- gpu +- cpu + +config_file: | + backend: llama-cpp + context_size: 4096 + f16: true + + mmap: true + roles: + user: "USER:" + assistant: "ASSISTANT:" + system: "SYSTEM:" + + template: + chat: | + A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. + {{.Input}} + ASSISTANT: diff --git a/gallery/phi-2-chat.yaml b/gallery/phi-2-chat.yaml new file mode 100644 index 00000000..3370311f --- /dev/null +++ b/gallery/phi-2-chat.yaml @@ -0,0 +1,50 @@ +name: "phi-2-chatml" +license: mit + +description: | + Phi-2 fine-tuned by the OpenHermes 2.5 dataset optimised for multi-turn conversation and character impersonation. + + The dataset has been pre-processed by doing the following: + + - remove all refusals + - remove any mention of AI assistant + - split any multi-turn dialog generated in the dataset into multi-turn conversations records + - added nfsw generated conversations from the Teatime dataset + + Developed by: l3utterfly + Funded by: Layla Network + Model type: Phi + Language(s) (NLP): English + License: MIT + Finetuned from model: Phi-2 + + +urls: +- https://huggingface.co/l3utterfly/phi-2-layla-v1-chatml +- https://huggingface.co/l3utterfly/phi-2-layla-v1-chatml-gguf + +tags: +- llm +- gguf +- gpu +- cpu + +config_file: | + mmap: true + # parameters: + # model: huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf + + template: + chat_message: | + <|im_start|>{{ .RoleName }} + {{.Content}}<|im_end|> + chat: | + {{.Input}} + <|im_start|>assistant + completion: | + {{.Input}} + context_size: 4096 + f16: true + stopwords: + - <|im_end|> + diff --git a/gallery/phi-2-orange.yaml b/gallery/phi-2-orange.yaml new file mode 100644 index 00000000..9800f8da --- /dev/null +++ b/gallery/phi-2-orange.yaml @@ -0,0 +1,33 @@ +name: "phi-2-orange" +license: mit +icon: "https://huggingface.co/rhysjones/phi-2-orange/resolve/main/phi-2-orange.jpg" +description: | + A two-step finetune of Phi-2, with a bit of zest. + + There is an updated model at rhysjones/phi-2-orange-v2 which has higher evals, if you wish to test. +urls: +- https://huggingface.co/rhysjones/phi-2-orange +- https://huggingface.co/TheBloke/phi-2-orange-GGUF + +tags: +- llm +- gguf +- gpu +- cpu + +config_file: | + mmap: true + template: + chat_message: | + <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}} + {{if .Content}}{{.Content}}{{end}}<|im_end|> + chat: | + {{.Input}} + <|im_start|>assistant + completion: | + {{.Input}} + context_size: 4096 + f16: true + stopwords: + - <|im_end|> + - diff --git a/gallery/phi-3-chat.yaml b/gallery/phi-3-chat.yaml new file mode 100644 index 00000000..24dbc20f --- /dev/null +++ b/gallery/phi-3-chat.yaml @@ -0,0 +1,31 @@ +name: "phi-3-chat" +license: mit + +description: | + The Phi-3-Mini-4K-Instruct is a 3.8B parameters, lightweight, state-of-the-art open model trained with the Phi-3 datasets that includes both synthetic data and the filtered publicly available websites data with a focus on high-quality and reasoning dense properties. The model belongs to the Phi-3 family with the Mini version in two variants 4K and 128K which is the context length (in tokens) it can support. The model has underwent a post-training process that incorporates both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures. When assessed against benchmarks testing common sense, language understanding, math, code, long context and logical reasoning, Phi-3 Mini-4K-Instruct showcased a robust and state-of-the-art performance among models with less than 13 billion parameters. + +urls: +- https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf + +tags: +- llm +- gguf +- gpu +- cpu + +config_file: | + mmap: true + template: + chat_message: | + <|{{ .RoleName }}|> + {{.Content}}<|end|> + chat: | + {{.Input}} + <|assistant|> + completion: | + {{.Input}} + context_size: 4096 + f16: true + stopwords: + - <|end|> + diff --git a/gallery/piper.yaml b/gallery/piper.yaml new file mode 100644 index 00000000..d759ba92 --- /dev/null +++ b/gallery/piper.yaml @@ -0,0 +1,15 @@ +config_file: | + backend: piper +icon: https://github.com/rhasspy/piper/raw/master/etc/logo.png +license: mit + +urls: + - https://github.com/rhasspy/piper + +description: | + A fast, local neural text to speech system that sounds great and is optimized for the Raspberry Pi 4. Piper is used in a variety of [projects](https://github.com/rhasspy/piper#people-using-piper). + +tags: +- tts +- text-to-speech +- cpu diff --git a/gallery/sentencetransformers.yaml b/gallery/sentencetransformers.yaml new file mode 100644 index 00000000..1830cce3 --- /dev/null +++ b/gallery/sentencetransformers.yaml @@ -0,0 +1,12 @@ +name: "sentencetransformers" +description: | + This framework provides an easy method to compute dense vector representations for sentences, paragraphs, and images. The models are based on transformer networks like BERT / RoBERTa / XLM-RoBERTa etc. and achieve state-of-the-art performance in various tasks. Text is embedded in vector space such that similar text are closer and can efficiently be found using cosine similarity. + urls: + - https://github.com/UKPLab/sentence-transformers +tags: +- gpu +- cpu +- embeddings + +config_file: | + backend: sentencetransformers \ No newline at end of file From 55778b35fff7909927e7699a8232eceec0f5c340 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 23 Apr 2024 19:47:42 +0200 Subject: [PATCH 0106/2648] fix(gallery): move metadata where it belongs Signed-off-by: Ettore Di Giacinto --- gallery/bert-embeddings.yaml | 6 +- gallery/codellama.yaml | 14 -- gallery/dreamshaper.yaml | 13 -- gallery/hermes-2-pro-mistral.yaml | 21 -- gallery/index.yaml | 375 ++++++++++++++++++++++++------ gallery/llama3-instruct.yaml | 21 -- gallery/llava.yaml | 13 -- gallery/phi-2-chat.yaml | 31 --- gallery/phi-2-orange.yaml | 15 -- gallery/phi-3-chat.yaml | 13 -- gallery/piper.yaml | 13 -- gallery/sentencetransformers.yaml | 8 - gallery/stablediffusion.yaml | 6 - gallery/tinydream.yaml | 6 - gallery/whisper-base.yaml | 6 - 15 files changed, 310 insertions(+), 251 deletions(-) diff --git a/gallery/bert-embeddings.yaml b/gallery/bert-embeddings.yaml index 0798bf54..01f05f33 100644 --- a/gallery/bert-embeddings.yaml +++ b/gallery/bert-embeddings.yaml @@ -1,9 +1,5 @@ name: "bert-embeddings" -license: "Apache 2.0" -urls: -- https://huggingface.co/skeskinen/ggml -description: | - Bert model that can be used for embeddings + config_file: | parameters: model: bert-MiniLM-L6-v2q4_0.bin diff --git a/gallery/codellama.yaml b/gallery/codellama.yaml index 1b773ed6..a4c3233f 100644 --- a/gallery/codellama.yaml +++ b/gallery/codellama.yaml @@ -1,18 +1,4 @@ name: "codellama" -license: llama2 - -description: | - Code Llama is a collection of pretrained and fine-tuned generative text models ranging in scale from 7 billion to 34 billion parameters. This model is designed for general code synthesis and understanding. - -urls: -- https://huggingface.co/TheBloke/CodeLlama-7B-GGUF -- https://huggingface.co/meta-llama/CodeLlama-7b-hf - -tags: -- llm -- gguf -- gpu -- cpu config_file: | backend: llama-cpp diff --git a/gallery/dreamshaper.yaml b/gallery/dreamshaper.yaml index 894ae0cf..219a1e53 100644 --- a/gallery/dreamshaper.yaml +++ b/gallery/dreamshaper.yaml @@ -1,18 +1,5 @@ name: "dreamshaper" -icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/ggO2sBDJ8Bhc6w-zwTx5j.png -license: other -description: | - A text-to-image model that uses Stable Diffusion 1.5 to generate images from text prompts. This model is DreamShaper model by Lykon. - -urls: -- https://civitai.com/models/4384/dreamshaper - -tags: -- text-to-image -- stablediffusion -- sd-1.5 -- gpu config_file: | backend: diffusers diff --git a/gallery/hermes-2-pro-mistral.yaml b/gallery/hermes-2-pro-mistral.yaml index 5a79d5cb..d4771a11 100644 --- a/gallery/hermes-2-pro-mistral.yaml +++ b/gallery/hermes-2-pro-mistral.yaml @@ -1,26 +1,5 @@ name: "hermes-2-pro-mistral" -icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/ggO2sBDJ8Bhc6w-zwTx5j.png -license: apache-2.0 -description: | - Hermes 2 Pro is an upgraded, retrained version of Nous Hermes 2, consisting of an updated and cleaned version of the OpenHermes 2.5 Dataset, as well as a newly introduced Function Calling and JSON Mode dataset developed in-house. - - This new version of Hermes maintains its excellent general task and conversation capabilities - but also excels at Function Calling, JSON Structured Outputs, and has improved on several other metrics as well, scoring a 90% on our function calling evaluation built in partnership with Fireworks.AI, and an 81% on our structured JSON Output evaluation. - - Hermes Pro takes advantage of a special system prompt and multi-turn function calling structure with a new chatml role in order to make function calling reliable and easy to parse. Learn more about prompting below. - - This work was a collaboration between Nous Research, @interstellarninja, and Fireworks.AI - - Learn more about the function calling on our github repo here: https://github.com/NousResearch/Hermes-Function-Calling/tree/main - -urls: -- https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF - -tags: -- llm -- gguf -- gpu -- cpu config_file: | mmap: true diff --git a/gallery/index.yaml b/gallery/index.yaml index 4582838e..bb1c5250 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -4,6 +4,28 @@ - &llama3 url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master" name: "llama3-8b-instruct" + license: llama3 + + description: | + Meta developed and released the Meta Llama 3 family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8 and 70B sizes. The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks. Further, in developing these models, we took great care to optimize helpfulness and safety. + + Model developers Meta + + Variations Llama 3 comes in two sizes — 8B and 70B parameters — in pre-trained and instruction tuned variants. + + Input Models input text only. + + Output Models generate text and code only. + + Model Architecture Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety. + urls: + - https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct + + tags: + - llm + - gguf + - gpu + - cpu overrides: parameters: model: Meta-Llama-3-8B-Instruct-Q5_K_M.gguf @@ -15,6 +37,20 @@ ### START LLaVa - &llava url: "github:mudler/LocalAI/gallery/llava.yaml@master" + license: apache-2.0 + + description: | + LLaVA represents a novel end-to-end trained large multimodal model that combines a vision encoder and Vicuna for general-purpose visual and language understanding, achieving impressive chat capabilities mimicking spirits of the multimodal GPT-4 and setting a new state-of-the-art accuracy on Science QA. + + urls: + - https://llava-vl.github.io/ + + tags: + - llm + - multimodal + - gguf + - gpu + - cpu name: "llava-1.6-vicuna" overrides: mmproj: mmproj-vicuna7b-f16.gguf @@ -52,8 +88,36 @@ sha256: 09c230de47f6f843e4841656f7895cac52c6e7ec7392acb5e8527de8b775c45a uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-mmproj-Q8_0.gguf ### START Phi-2 -- &phi-2 +- &phi-2-chat url: "github:mudler/LocalAI/gallery/phi-2-chat.yaml@master" + license: mit + + description: | + Phi-2 fine-tuned by the OpenHermes 2.5 dataset optimised for multi-turn conversation and character impersonation. + + The dataset has been pre-processed by doing the following: + + - remove all refusals + - remove any mention of AI assistant + - split any multi-turn dialog generated in the dataset into multi-turn conversations records + - added nfsw generated conversations from the Teatime dataset + + Developed by: l3utterfly + Funded by: Layla Network + Model type: Phi + Language(s) (NLP): English + License: MIT + Finetuned from model: Phi-2 + + urls: + - https://huggingface.co/l3utterfly/phi-2-layla-v1-chatml + - https://huggingface.co/l3utterfly/phi-2-layla-v1-chatml-gguf + + tags: + - llm + - gguf + - gpu + - cpu name: "phi-2-chat:Q8_0" overrides: parameters: @@ -62,7 +126,7 @@ - filename: "phi-2-layla-v1-chatml-Q8_0.gguf" sha256: "0cf542a127c2c835066a78028009b7eddbaf773cc2a26e1cb157ce5e09c1a2e0" uri: "huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf" -- <<: *phi-2 +- <<: *phi-2-chat name: "phi-2-chat" overrides: parameters: @@ -71,7 +135,22 @@ - filename: "phi-2-layla-v1-chatml-Q4_K.gguf" sha256: "b071e5624b60b8911f77261398802c4b4079c6c689e38e2ce75173ed62bc8a48" uri: "huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q4_K.gguf" -- <<: *phi-2 +- <<: *phi-2-chat + license: mit + icon: "https://huggingface.co/rhysjones/phi-2-orange/resolve/main/phi-2-orange.jpg" + description: | + A two-step finetune of Phi-2, with a bit of zest. + + There is an updated model at rhysjones/phi-2-orange-v2 which has higher evals, if you wish to test. + urls: + - https://huggingface.co/rhysjones/phi-2-orange + - https://huggingface.co/TheBloke/phi-2-orange-GGUF + + tags: + - llm + - gguf + - gpu + - cpu name: "phi-2-orange" overrides: parameters: @@ -84,6 +163,19 @@ - &phi-3 url: "github:mudler/LocalAI/gallery/phi-3-chat.yaml@master" name: "phi-3-mini-4k-instruct" + license: mit + + description: | + The Phi-3-Mini-4K-Instruct is a 3.8B parameters, lightweight, state-of-the-art open model trained with the Phi-3 datasets that includes both synthetic data and the filtered publicly available websites data with a focus on high-quality and reasoning dense properties. The model belongs to the Phi-3 family with the Mini version in two variants 4K and 128K which is the context length (in tokens) it can support. The model has underwent a post-training process that incorporates both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures. When assessed against benchmarks testing common sense, language understanding, math, code, long context and logical reasoning, Phi-3 Mini-4K-Instruct showcased a robust and state-of-the-art performance among models with less than 13 billion parameters. + + urls: + - https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf + + tags: + - llm + - gguf + - gpu + - cpu overrides: parameters: model: Phi-3-mini-4k-instruct-q4.gguf @@ -104,6 +196,28 @@ - &hermes-2-pro-mistral url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master" name: "hermes-2-pro-mistral" + icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/ggO2sBDJ8Bhc6w-zwTx5j.png + license: apache-2.0 + + description: | + Hermes 2 Pro is an upgraded, retrained version of Nous Hermes 2, consisting of an updated and cleaned version of the OpenHermes 2.5 Dataset, as well as a newly introduced Function Calling and JSON Mode dataset developed in-house. + + This new version of Hermes maintains its excellent general task and conversation capabilities - but also excels at Function Calling, JSON Structured Outputs, and has improved on several other metrics as well, scoring a 90% on our function calling evaluation built in partnership with Fireworks.AI, and an 81% on our structured JSON Output evaluation. + + Hermes Pro takes advantage of a special system prompt and multi-turn function calling structure with a new chatml role in order to make function calling reliable and easy to parse. Learn more about prompting below. + + This work was a collaboration between Nous Research, @interstellarninja, and Fireworks.AI + + Learn more about the function calling on our github repo here: https://github.com/NousResearch/Hermes-Function-Calling/tree/main + + urls: + - https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF + + tags: + - llm + - gguf + - gpu + - cpu overrides: parameters: model: Hermes-2-Pro-Mistral-7B.Q4_0.gguf @@ -135,6 +249,20 @@ - &codellama url: "github:mudler/LocalAI/gallery/codellama.yaml@master" name: "codellama-7b" + license: llama2 + + description: | + Code Llama is a collection of pretrained and fine-tuned generative text models ranging in scale from 7 billion to 34 billion parameters. This model is designed for general code synthesis and understanding. + + urls: + - https://huggingface.co/TheBloke/CodeLlama-7B-GGUF + - https://huggingface.co/meta-llama/CodeLlama-7b-hf + + tags: + - llm + - gguf + - gpu + - cpu overrides: parameters: model: codellama-7b.Q4_0.gguf @@ -145,6 +273,14 @@ ### START Embeddings - &sentencentransformers + description: | + This framework provides an easy method to compute dense vector representations for sentences, paragraphs, and images. The models are based on transformer networks like BERT / RoBERTa / XLM-RoBERTa etc. and achieve state-of-the-art performance in various tasks. Text is embedded in vector space such that similar text are closer and can efficiently be found using cosine similarity. + urls: + - https://github.com/UKPLab/sentence-transformers + tags: + - gpu + - cpu + - embeddings name: "all-MiniLM-L6-v2" url: "github:mudler/LocalAI/gallery/sentencetransformers.yaml@master" overrides: @@ -152,8 +288,22 @@ model: all-MiniLM-L6-v2 ### START Image generation -- &diffusers +- &dreamshaper name: dreamshaper + icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/ggO2sBDJ8Bhc6w-zwTx5j.png + license: other + + description: | + A text-to-image model that uses Stable Diffusion 1.5 to generate images from text prompts. This model is DreamShaper model by Lykon. + + urls: + - https://civitai.com/models/4384/dreamshaper + + tags: + - text-to-image + - stablediffusion + - sd-1.5 + - gpu url: "github:mudler/LocalAI/gallery/dreamshaper.yaml@master" overrides: parameters: @@ -166,32 +316,71 @@ ## Whisper - url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master" name: "whisper-1" + license: "MIT" + urls: + - https://github.com/ggerganov/whisper.cpp + - https://huggingface.co/ggerganov/whisper.cpp + + description: | + Port of OpenAI's Whisper model in C/C++ ## Bert embeddings - url: "github:mudler/LocalAI/gallery/bert-embeddings.yaml@master" name: "bert-embeddings" - -- url: "github:mudler/LocalAI/gallery/bert-embeddings.yaml@master" - name: "text-embedding-ada-002" - + license: "Apache 2.0" + urls: + - https://huggingface.co/skeskinen/ggml + tags: + - embeddings + description: | + Bert model that can be used for embeddings + ## Stable Diffusion - url: github:mudler/LocalAI/gallery/stablediffusion.yaml@master - name: stablediffusion + license: "BSD-3" + urls: + - https://github.com/EdVince/Stable-Diffusion-NCNN + - https://github.com/EdVince/Stable-Diffusion-NCNN/blob/main/LICENSE + + description: | + Stable Diffusion in NCNN with c++, supported txt2img and img2img + name: stablediffusion-cpp ## Tiny Dream - url: github:mudler/LocalAI/gallery/tinydream.yaml@master name: tinydream - + license: "BSD-3" + urls: + - https://github.com/symisc/tiny-dream + - https://github.com/symisc/tiny-dream/blob/main/LICENSE + + description: | + An embedded, Header Only, Stable Diffusion C++ implementation ## Piper TTS -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- &piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-kathleen-low + icon: https://github.com/rhasspy/piper/raw/master/etc/logo.png + license: mit + + urls: + - https://github.com/rhasspy/piper + + description: | + A fast, local neural text to speech system that sounds great and is optimized for the Raspberry Pi 4. Piper is used in a variety of [projects](https://github.com/rhasspy/piper#people-using-piper). + + tags: + - tts + - text-to-speech + - cpu + override: parameters: model: en-us-kathleen-low.onnx files: - filename: voice-en-us-kathleen-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-kathleen-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper name: voice-ca-upc_ona-x-low override: parameters: @@ -199,7 +388,8 @@ files: - filename: voice-ca-upc_ona-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ca-upc_ona-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-ca-upc_pau-x-low override: parameters: @@ -207,7 +397,8 @@ files: - filename: voice-ca-upc_pau-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ca-upc_pau-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-da-nst_talesyntese-medium override: parameters: @@ -215,7 +406,8 @@ files: - filename: voice-da-nst_talesyntese-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-da-nst_talesyntese-medium.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-de-eva_k-x-low override: parameters: @@ -223,7 +415,8 @@ files: - filename: voice-de-eva_k-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-eva_k-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-de-karlsson-low override: parameters: @@ -231,7 +424,8 @@ files: - filename: voice-de-karlsson-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-karlsson-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-de-kerstin-low override: parameters: @@ -239,7 +433,8 @@ files: - filename: voice-de-kerstin-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-kerstin-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-de-pavoque-low override: parameters: @@ -247,7 +442,8 @@ files: - filename: voice-de-pavoque-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-pavoque-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-de-ramona-low override: parameters: @@ -255,7 +451,8 @@ files: - filename: voice-de-ramona-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-ramona-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-de-thorsten-low override: @@ -264,7 +461,8 @@ files: - filename: voice-de-thorsten-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-de-thorsten-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-el-gr-rapunzelina-low override: @@ -273,7 +471,8 @@ files: - filename: voice-el-gr-rapunzelina-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-el-gr-rapunzelina-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-gb-alan-low override: @@ -282,7 +481,8 @@ files: - filename: voice-en-gb-alan-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-gb-alan-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-gb-southern_english_female-low override: @@ -291,7 +491,8 @@ files: - filename: voice-en-gb-southern_english_female-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-gb-southern_english_female-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-amy-low override: @@ -300,7 +501,8 @@ files: - filename: voice-en-us-amy-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-danny-low override: @@ -309,7 +511,8 @@ files: - filename: voice-en-us-danny-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-danny-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-kathleen-low override: @@ -318,7 +521,8 @@ files: - filename: voice-en-us-kathleen-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-kathleen-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-lessac-low override: @@ -327,7 +531,8 @@ files: - filename: voice-en-us-lessac-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-lessac-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-lessac-medium override: @@ -336,7 +541,8 @@ files: - filename: voice-en-us-lessac-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-lessac-medium.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-libritts-high override: @@ -345,7 +551,8 @@ files: - filename: voice-en-us-libritts-high.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-libritts-high.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-ryan-high override: @@ -354,7 +561,8 @@ files: - filename: voice-en-us-ryan-high.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-ryan-high.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-ryan-low override: @@ -364,7 +572,8 @@ - filename: voice-en-us-ryan-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-ryan-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-ryan-medium override: @@ -374,7 +583,8 @@ - filename: voice-en-us-ryan-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-ryan-medium.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us_lessac override: parameters: @@ -383,7 +593,8 @@ - filename: voice-en-us_lessac.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us_lessac.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-es-carlfm-x-low override: parameters: @@ -392,7 +603,8 @@ - filename: voice-es-carlfm-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-es-carlfm-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-es-mls_10246-low override: @@ -402,7 +614,8 @@ - filename: voice-es-mls_10246-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-es-mls_10246-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-es-mls_9972-low override: @@ -412,7 +625,8 @@ - filename: voice-es-mls_9972-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-es-mls_9972-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-fi-harri-low override: @@ -422,7 +636,8 @@ - filename: voice-fi-harri-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fi-harri-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-fr-gilles-low override: @@ -432,7 +647,8 @@ - filename: voice-fr-gilles-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-gilles-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-fr-mls_1840-low override: @@ -442,7 +658,8 @@ - filename: voice-fr-mls_1840-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-mls_1840-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-fr-siwis-low override: @@ -452,7 +669,8 @@ - filename: voice-fr-siwis-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-siwis-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-fr-siwis-medium override: @@ -462,7 +680,8 @@ - filename: voice-fr-siwis-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-fr-siwis-medium.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-is-bui-medium override: @@ -472,7 +691,8 @@ - filename: voice-is-bui-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-is-bui-medium.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-is-salka-medium override: @@ -482,7 +702,8 @@ - filename: voice-is-salka-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-is-salka-medium.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-is-steinn-medium override: @@ -492,7 +713,8 @@ - filename: voice-is-steinn-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-is-steinn-medium.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-is-ugla-medium override: @@ -502,7 +724,8 @@ - filename: voice-is-ugla-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-is-ugla-medium.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-it-riccardo_fasol-x-low override: @@ -512,7 +735,8 @@ - filename: voice-it-riccardo_fasol-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-it-riccardo_fasol-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-kk-iseke-x-low override: @@ -522,7 +746,8 @@ - filename: voice-kk-iseke-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-kk-iseke-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-kk-issai-high override: @@ -532,7 +757,8 @@ - filename: voice-kk-issai-high.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-kk-issai-high.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-kk-raya-x-low override: @@ -542,7 +768,8 @@ - filename: voice-kk-raya-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-kk-raya-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-ne-google-medium override: @@ -552,7 +779,8 @@ - filename: voice-ne-google-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ne-google-medium.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-ne-google-x-low override: @@ -562,7 +790,8 @@ - filename: voice-ne-google-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ne-google-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-nl-mls_5809-low override: @@ -572,7 +801,8 @@ - filename: voice-nl-mls_5809-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-mls_5809-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-nl-mls_7432-low override: @@ -582,7 +812,8 @@ - filename: voice-nl-mls_7432-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-mls_7432-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-nl-nathalie-x-low override: @@ -592,7 +823,8 @@ - filename: voice-nl-nathalie-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-nathalie-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-nl-rdh-medium override: @@ -602,7 +834,8 @@ - filename: voice-nl-rdh-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-rdh-medium.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-nl-rdh-x-low override: @@ -612,7 +845,8 @@ - filename: voice-nl-rdh-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-nl-rdh-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-no-talesyntese-medium override: @@ -622,7 +856,8 @@ - filename: voice-no-talesyntese-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-no-talesyntese-medium.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-pl-mls_6892-low override: @@ -632,7 +867,8 @@ - filename: voice-pl-mls_6892-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-pl-mls_6892-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-pt-br-edresson-low override: @@ -642,7 +878,8 @@ - filename: voice-pt-br-edresson-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-pt-br-edresson-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-ru-irinia-medium override: @@ -652,7 +889,8 @@ - filename: voice-ru-irinia-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-ru-irinia-medium.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-sv-se-nst-medium override: @@ -662,7 +900,8 @@ - filename: voice-sv-se-nst-medium.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-sv-se-nst-medium.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-uk-lada-x-low override: @@ -672,7 +911,8 @@ - filename: voice-uk-lada-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-uk-lada-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-vi-25hours-single-low override: @@ -682,7 +922,8 @@ - filename: voice-vi-25hours-single-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-vi-25hours-single-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-vi-vivos-x-low override: @@ -692,7 +933,8 @@ - filename: voice-vi-vivos-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-vi-vivos-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-zh-cn-huayan-x-low override: @@ -702,7 +944,8 @@ - filename: voice-zh-cn-huayan-x-low.tar.gz uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-zh-cn-huayan-x-low.tar.gz -- url: github:mudler/LocalAI/gallery/piper.yaml@master +- <<: *piper + url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-zh_CN-huayan-medium override: diff --git a/gallery/llama3-instruct.yaml b/gallery/llama3-instruct.yaml index 4e29e740..96272c58 100644 --- a/gallery/llama3-instruct.yaml +++ b/gallery/llama3-instruct.yaml @@ -1,26 +1,5 @@ name: "llama3-instruct" -license: llama3 -description: | - Meta developed and released the Meta Llama 3 family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8 and 70B sizes. The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks. Further, in developing these models, we took great care to optimize helpfulness and safety. - - Model developers Meta - - Variations Llama 3 comes in two sizes — 8B and 70B parameters — in pre-trained and instruction tuned variants. - - Input Models input text only. - - Output Models generate text and code only. - - Model Architecture Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety. -urls: -- https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct - -tags: -- llm -- gguf -- gpu -- cpu config_file: | mmap: true diff --git a/gallery/llava.yaml b/gallery/llava.yaml index 159ae34c..44c1aa97 100644 --- a/gallery/llava.yaml +++ b/gallery/llava.yaml @@ -1,18 +1,5 @@ name: "llava" -license: apache-2.0 -description: | - LLaVA represents a novel end-to-end trained large multimodal model that combines a vision encoder and Vicuna for general-purpose visual and language understanding, achieving impressive chat capabilities mimicking spirits of the multimodal GPT-4 and setting a new state-of-the-art accuracy on Science QA. - -urls: -- https://llava-vl.github.io/ - -tags: -- llm -- multimodal -- gguf -- gpu -- cpu config_file: | backend: llama-cpp diff --git a/gallery/phi-2-chat.yaml b/gallery/phi-2-chat.yaml index 3370311f..3fc84d3b 100644 --- a/gallery/phi-2-chat.yaml +++ b/gallery/phi-2-chat.yaml @@ -1,39 +1,8 @@ name: "phi-2-chatml" -license: mit -description: | - Phi-2 fine-tuned by the OpenHermes 2.5 dataset optimised for multi-turn conversation and character impersonation. - - The dataset has been pre-processed by doing the following: - - - remove all refusals - - remove any mention of AI assistant - - split any multi-turn dialog generated in the dataset into multi-turn conversations records - - added nfsw generated conversations from the Teatime dataset - - Developed by: l3utterfly - Funded by: Layla Network - Model type: Phi - Language(s) (NLP): English - License: MIT - Finetuned from model: Phi-2 - - -urls: -- https://huggingface.co/l3utterfly/phi-2-layla-v1-chatml -- https://huggingface.co/l3utterfly/phi-2-layla-v1-chatml-gguf - -tags: -- llm -- gguf -- gpu -- cpu config_file: | mmap: true - # parameters: - # model: huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf - template: chat_message: | <|im_start|>{{ .RoleName }} diff --git a/gallery/phi-2-orange.yaml b/gallery/phi-2-orange.yaml index 9800f8da..645875ad 100644 --- a/gallery/phi-2-orange.yaml +++ b/gallery/phi-2-orange.yaml @@ -1,19 +1,4 @@ name: "phi-2-orange" -license: mit -icon: "https://huggingface.co/rhysjones/phi-2-orange/resolve/main/phi-2-orange.jpg" -description: | - A two-step finetune of Phi-2, with a bit of zest. - - There is an updated model at rhysjones/phi-2-orange-v2 which has higher evals, if you wish to test. -urls: -- https://huggingface.co/rhysjones/phi-2-orange -- https://huggingface.co/TheBloke/phi-2-orange-GGUF - -tags: -- llm -- gguf -- gpu -- cpu config_file: | mmap: true diff --git a/gallery/phi-3-chat.yaml b/gallery/phi-3-chat.yaml index 24dbc20f..b17e5bb4 100644 --- a/gallery/phi-3-chat.yaml +++ b/gallery/phi-3-chat.yaml @@ -1,17 +1,4 @@ name: "phi-3-chat" -license: mit - -description: | - The Phi-3-Mini-4K-Instruct is a 3.8B parameters, lightweight, state-of-the-art open model trained with the Phi-3 datasets that includes both synthetic data and the filtered publicly available websites data with a focus on high-quality and reasoning dense properties. The model belongs to the Phi-3 family with the Mini version in two variants 4K and 128K which is the context length (in tokens) it can support. The model has underwent a post-training process that incorporates both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures. When assessed against benchmarks testing common sense, language understanding, math, code, long context and logical reasoning, Phi-3 Mini-4K-Instruct showcased a robust and state-of-the-art performance among models with less than 13 billion parameters. - -urls: -- https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf - -tags: -- llm -- gguf -- gpu -- cpu config_file: | mmap: true diff --git a/gallery/piper.yaml b/gallery/piper.yaml index d759ba92..eb1a6ecc 100644 --- a/gallery/piper.yaml +++ b/gallery/piper.yaml @@ -1,15 +1,2 @@ config_file: | backend: piper -icon: https://github.com/rhasspy/piper/raw/master/etc/logo.png -license: mit - -urls: - - https://github.com/rhasspy/piper - -description: | - A fast, local neural text to speech system that sounds great and is optimized for the Raspberry Pi 4. Piper is used in a variety of [projects](https://github.com/rhasspy/piper#people-using-piper). - -tags: -- tts -- text-to-speech -- cpu diff --git a/gallery/sentencetransformers.yaml b/gallery/sentencetransformers.yaml index 1830cce3..9ba5d29b 100644 --- a/gallery/sentencetransformers.yaml +++ b/gallery/sentencetransformers.yaml @@ -1,12 +1,4 @@ name: "sentencetransformers" -description: | - This framework provides an easy method to compute dense vector representations for sentences, paragraphs, and images. The models are based on transformer networks like BERT / RoBERTa / XLM-RoBERTa etc. and achieve state-of-the-art performance in various tasks. Text is embedded in vector space such that similar text are closer and can efficiently be found using cosine similarity. - urls: - - https://github.com/UKPLab/sentence-transformers -tags: -- gpu -- cpu -- embeddings config_file: | backend: sentencetransformers \ No newline at end of file diff --git a/gallery/stablediffusion.yaml b/gallery/stablediffusion.yaml index c8a0eb8b..9b1cad32 100644 --- a/gallery/stablediffusion.yaml +++ b/gallery/stablediffusion.yaml @@ -1,11 +1,5 @@ name: "stablediffusion-cpp" -license: "BSD-3" -urls: -- https://github.com/EdVince/Stable-Diffusion-NCNN -- https://github.com/EdVince/Stable-Diffusion-NCNN/blob/main/LICENSE -description: | - Stable Diffusion in NCNN with c++, supported txt2img and img2img config_file: | name: stablediffusion-cpp backend: stablediffusion diff --git a/gallery/tinydream.yaml b/gallery/tinydream.yaml index 415762de..6e39414c 100644 --- a/gallery/tinydream.yaml +++ b/gallery/tinydream.yaml @@ -1,11 +1,5 @@ name: "tinydream" -license: "BSD-3" -urls: - - https://github.com/symisc/tiny-dream - - https://github.com/symisc/tiny-dream/blob/main/LICENSE -description: | - An embedded, Header Only, Stable Diffusion C++ implementation config_file: | name: tinydream backend: tinydream diff --git a/gallery/whisper-base.yaml b/gallery/whisper-base.yaml index 574dbb13..f654a37c 100644 --- a/gallery/whisper-base.yaml +++ b/gallery/whisper-base.yaml @@ -1,11 +1,5 @@ name: "whisper-base" -license: "MIT" -urls: -- https://github.com/ggerganov/whisper.cpp -- https://huggingface.co/ggerganov/whisper.cpp -description: | - Port of OpenAI's Whisper model in C/C++ config_file: | backend: whisper From a09fe1b9babcfec6e91f596b6597a4030d9552fc Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 23 Apr 2024 20:00:20 +0200 Subject: [PATCH 0107/2648] fix(gallery): set margin for images Signed-off-by: Ettore Di Giacinto --- core/http/elements/gallery.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/http/elements/gallery.go b/core/http/elements/gallery.go index 405f42ae..f2b4f8dd 100644 --- a/core/http/elements/gallery.go +++ b/core/http/elements/gallery.go @@ -209,7 +209,7 @@ func ListModels(models []*gallery.GalleryModel) string { }, elem.Img(attrs.Props{ // "class": "rounded-t-lg object-fit object-center h-96", - "class": "rounded-t-lg max-h-48 max-w-96 object-cover", + "class": "rounded-t-lg max-h-48 max-w-96 object-cover mt-3", "src": m.Icon, }), ), From d2bea6f9e3c30056b5d1adcfc6dd3ff5fae560af Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 23 Apr 2024 20:01:56 +0200 Subject: [PATCH 0108/2648] fix(gallery): fixup hermes q8 entry Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index bb1c5250..16916703 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -235,7 +235,7 @@ sha256: "40adc3b227bc36764de148fdda4df5df385adc06650d58d4dbe726ee0214eeff" uri: "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf" - <<: *hermes-2-pro-mistral - name: "hermes-2-pro-mistral" + name: "hermes-2-pro-mistral:Q8_0" overrides: parameters: model: Hermes-2-Pro-Mistral-7B.Q8_0.gguf From 34c3f563fd4c50162dc4e64eb4cd9265ac4afb05 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 23 Apr 2024 20:05:59 +0200 Subject: [PATCH 0109/2648] fix(gallery): fixup dreamshaper icon Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index 16916703..deab29cf 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -290,7 +290,7 @@ ### START Image generation - &dreamshaper name: dreamshaper - icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/ggO2sBDJ8Bhc6w-zwTx5j.png + icon: https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/dd9b038c-bd15-43ab-86ab-66e145ad7ff2/width=450/26072158-132340247-8k%20portrait%20of%20beautiful%20cyborg%20with%20brown%20hair,%20intricate,%20elegant,%20highly%20detailed,%20majestic,%20digital%20photography,%20art%20by%20artg_ed.jpeg license: other description: | From ac56ac2b2da3bba78122b2e80eb36afc28e51056 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 23 Apr 2024 20:10:58 +0200 Subject: [PATCH 0110/2648] fix(gallery): show a fake image if no there is no icon (#2111) Signed-off-by: Ettore Di Giacinto --- core/http/elements/gallery.go | 40 ++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/core/http/elements/gallery.go b/core/http/elements/gallery.go index f2b4f8dd..c03750da 100644 --- a/core/http/elements/gallery.go +++ b/core/http/elements/gallery.go @@ -8,6 +8,10 @@ import ( "github.com/go-skynet/LocalAI/pkg/gallery" ) +const ( + NoImage = "https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg" +) + func DoneProgress(uid string) string { return elem.Div( attrs.Props{}, @@ -197,25 +201,27 @@ func ListModels(models []*gallery.GalleryModel) string { elems := []elem.Node{} - if m.Icon != "" { - elems = append(elems, - - elem.Div(attrs.Props{ - "class": "flex justify-center items-center", - }, - elem.A(attrs.Props{ - "href": "#!", - // "class": "justify-center items-center", - }, - elem.Img(attrs.Props{ - // "class": "rounded-t-lg object-fit object-center h-96", - "class": "rounded-t-lg max-h-48 max-w-96 object-cover mt-3", - "src": m.Icon, - }), - ), - )) + if m.Icon == "" { + m.Icon = NoImage } + elems = append(elems, + + elem.Div(attrs.Props{ + "class": "flex justify-center items-center", + }, + elem.A(attrs.Props{ + "href": "#!", + // "class": "justify-center items-center", + }, + elem.Img(attrs.Props{ + // "class": "rounded-t-lg object-fit object-center h-96", + "class": "rounded-t-lg max-h-48 max-w-96 object-cover mt-3", + "src": m.Icon, + }), + ), + )) + elems = append(elems, descriptionDiv(m), actionDiv(m)) modelsElements = append(modelsElements, elem.Div( From f718a391c03c1b1ac870e9a083ca686613bac48f Mon Sep 17 00:00:00 2001 From: fakezeta Date: Wed, 24 Apr 2024 02:45:37 +0200 Subject: [PATCH 0111/2648] fix missing TrustRemoteCode in OpenVINO model load (#2114) --- backend/python/transformers/transformers_server.py | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py index 90053ed5..2f4140c2 100755 --- a/backend/python/transformers/transformers_server.py +++ b/backend/python/transformers/transformers_server.py @@ -149,6 +149,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): device_map="CPU" self.model = OVModelForCausalLM.from_pretrained(model_name, compile=True, + trust_remote_code=request.TrustRemoteCode, ov_config={"PERFORMANCE_HINT": "LATENCY"}, device=device_map) self.OV = True From 2fb34b00b5c5daa1b60c46a5b535d30c5acf35fc Mon Sep 17 00:00:00 2001 From: jtwolfe Date: Wed, 24 Apr 2024 17:17:49 +1000 Subject: [PATCH 0112/2648] Incl ocv pkg for diffsusers utils (#2115) * Update diffusers.yml Signed-off-by: jtwolfe * Update diffusers-rocm.yml Signed-off-by: jtwolfe --------- Signed-off-by: jtwolfe --- backend/python/diffusers/diffusers-rocm.yml | 1 + backend/python/diffusers/diffusers.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/backend/python/diffusers/diffusers-rocm.yml b/backend/python/diffusers/diffusers-rocm.yml index f261701d..97b2ce0f 100644 --- a/backend/python/diffusers/diffusers-rocm.yml +++ b/backend/python/diffusers/diffusers-rocm.yml @@ -61,4 +61,5 @@ dependencies: - urllib3==2.0.6 - zipp==3.17.0 - torch + - opencv-python prefix: /opt/conda/envs/diffusers diff --git a/backend/python/diffusers/diffusers.yml b/backend/python/diffusers/diffusers.yml index b1a7d9f9..d5d2913e 100644 --- a/backend/python/diffusers/diffusers.yml +++ b/backend/python/diffusers/diffusers.yml @@ -71,4 +71,5 @@ dependencies: - typing-extensions==4.8.0 - urllib3==2.0.6 - zipp==3.17.0 + - opencv-python prefix: /opt/conda/envs/diffusers From d65214a234d703812dec346be9fb929579382a6b Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 24 Apr 2024 11:11:41 +0200 Subject: [PATCH 0113/2648] :arrow_up: Update docs version mudler/LocalAI (#2113) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- docs/data/version.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/data/version.json b/docs/data/version.json index 55eebaeb..6a618115 100644 --- a/docs/data/version.json +++ b/docs/data/version.json @@ -1,3 +1,3 @@ { - "version": "null" + "version": "v2.12.4" } From 4fffc47e77db8a56bbc89fcac57e6c2ca369789e Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 24 Apr 2024 18:44:04 +0200 Subject: [PATCH 0114/2648] deps(llama.cpp): update, use better model for function call tests (#2119) deps(llama.cpp): update Signed-off-by: Ettore Di Giacinto --- Makefile | 2 +- core/http/app_test.go | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 761c76d6..1923f956 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=b8109bc0139f15a5b321909f47510b89dca47ffc +CPPLLAMA_VERSION?=4e96a812b3ce7322a29a3008db2ed73d9087b176 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all diff --git a/core/http/app_test.go b/core/http/app_test.go index 35e0a8bf..3699c0ed 100644 --- a/core/http/app_test.go +++ b/core/http/app_test.go @@ -489,11 +489,10 @@ var _ = Describe("API test", func() { if runtime.GOOS != "linux" { Skip("test supported only on linux") } - modelName := "codellama" + + modelName := "hermes-2-pro-mistral" response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{ - URL: "github:go-skynet/model-gallery/codellama-7b-instruct.yaml", - Name: modelName, - Overrides: map[string]interface{}{"backend": "llama", "mmap": true, "f16": true, "context_size": 128}, + ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml", }) Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response)) @@ -556,7 +555,7 @@ var _ = Describe("API test", func() { var res map[string]string err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res) Expect(err).ToNot(HaveOccurred()) - Expect(res["location"]).To(Equal("San Francisco"), fmt.Sprint(res)) + Expect(res["location"]).To(ContainSubstring("San Francisco"), fmt.Sprint(res)) Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res)) Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason)) }) From 23eac98b3c4aa62f75fc75ddbaf6a1b81326a22f Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 24 Apr 2024 19:43:07 +0200 Subject: [PATCH 0115/2648] docs: update hot topics Signed-off-by: Ettore Di Giacinto --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 0b32febd..343a7cf5 100644 --- a/README.md +++ b/README.md @@ -50,13 +50,13 @@ [Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) +- Reranker API: https://github.com/mudler/LocalAI/pull/2121 +- Gallery WebUI: https://github.com/mudler/LocalAI/pull/2104 - llama3: https://github.com/mudler/LocalAI/discussions/2076 - Parler-TTS: https://github.com/mudler/LocalAI/pull/2027 -- Landing page: https://github.com/mudler/LocalAI/pull/1922 - Openvino support: https://github.com/mudler/LocalAI/pull/1892 - Vector store: https://github.com/mudler/LocalAI/pull/1795 - All-in-one container image: https://github.com/mudler/LocalAI/issues/1855 -- Parallel function calling: https://github.com/mudler/LocalAI/pull/1726 / Tools API support: https://github.com/mudler/LocalAI/pull/1715 Hot topics (looking for contributors): - Backends v2: https://github.com/mudler/LocalAI/issues/1126 From 9dbd217c5972a56563f8a362f82469891349879f Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 24 Apr 2024 19:56:51 +0200 Subject: [PATCH 0116/2648] docs(integrations): add Wave terminal Signed-off-by: Ettore Di Giacinto --- docs/content/docs/integrations.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/content/docs/integrations.md b/docs/content/docs/integrations.md index 29f2db17..a7666e77 100644 --- a/docs/content/docs/integrations.md +++ b/docs/content/docs/integrations.md @@ -15,6 +15,7 @@ The list below is a list of software that integrates with LocalAI. - [AnythingLLM](https://github.com/Mintplex-Labs/anything-llm) - [Logseq GPT3 OpenAI plugin](https://github.com/briansunter/logseq-plugin-gpt3-openai) allows to set a base URL, and works with LocalAI. - https://plugins.jetbrains.com/plugin/21056-codegpt allows for custom OpenAI compatible endpoints since 2.4.0 +- [Wave Terminal](https://docs.waveterm.dev/features/supportedLLMs/localai) has native support for LocalAI! - https://github.com/longy2k/obsidian-bmo-chatbot - https://github.com/FlowiseAI/Flowise - https://github.com/k8sgpt-ai/k8sgpt From d30280ed23600beb083bc69ca988f7212c1581cd Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 24 Apr 2024 23:55:30 +0200 Subject: [PATCH 0117/2648] :arrow_up: Update ggerganov/whisper.cpp (#2122) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1923f956..c1fe9a48 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6 # whisper.cpp version -WHISPER_CPP_VERSION?=b0c3cbf2e851cf232e432b590dcc514a689ec028 +WHISPER_CPP_VERSION?=858452d58dba3acdc3431c9bced2bb8cfd9bf418 # bert.cpp version BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d From e16658b7ec065d9893202cbf15937140eea8119f Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 25 Apr 2024 00:00:17 +0200 Subject: [PATCH 0118/2648] :arrow_up: Update ggerganov/llama.cpp (#2123) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c1fe9a48..662e54bd 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=4e96a812b3ce7322a29a3008db2ed73d9087b176 +CPPLLAMA_VERSION?=784e11dea1f5ce9638851b2b0dddb107e2a609c8 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From b664edde292210d66b5f05c4ac5069d9123d1b38 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 25 Apr 2024 00:19:02 +0200 Subject: [PATCH 0119/2648] feat(rerankers): Add new backend, support jina rerankers API (#2121) Signed-off-by: Ettore Di Giacinto --- .github/workflows/test-extra.yml | 31 +++++ Dockerfile | 5 +- Makefile | 13 +- aio/cpu/rerank.yaml | 27 ++++ aio/entrypoint.sh | 2 +- aio/gpu-8g/rerank.yaml | 27 ++++ aio/intel/rerank.yaml | 27 ++++ backend/backend.proto | 24 ++++ .../transformers/transformers-nvidia.yml | 2 + .../transformers/transformers-rocm.yml | 2 + .../common-env/transformers/transformers.yml | 4 +- backend/python/rerankers/Makefile | 27 ++++ backend/python/rerankers/README.md | 5 + backend/python/rerankers/reranker.py | 123 ++++++++++++++++++ backend/python/rerankers/run.sh | 14 ++ backend/python/rerankers/test.sh | 11 ++ backend/python/rerankers/test_reranker.py | 90 +++++++++++++ core/backend/rerank.go | 39 ++++++ core/http/app.go | 1 + core/http/endpoints/jina/rerank.go | 84 ++++++++++++ core/http/routes/jina.go | 19 +++ core/schema/jina.go | 34 +++++ pkg/grpc/backend.go | 2 + pkg/grpc/client.go | 16 +++ pkg/grpc/embed.go | 4 + 25 files changed, 628 insertions(+), 5 deletions(-) create mode 100644 aio/cpu/rerank.yaml create mode 100644 aio/gpu-8g/rerank.yaml create mode 100644 aio/intel/rerank.yaml create mode 100644 backend/python/rerankers/Makefile create mode 100644 backend/python/rerankers/README.md create mode 100755 backend/python/rerankers/reranker.py create mode 100755 backend/python/rerankers/run.sh create mode 100755 backend/python/rerankers/test.sh create mode 100755 backend/python/rerankers/test_reranker.py create mode 100644 core/backend/rerank.go create mode 100644 core/http/endpoints/jina/rerank.go create mode 100644 core/http/routes/jina.go create mode 100644 core/schema/jina.go diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml index fa45cb3c..f9476d4d 100644 --- a/.github/workflows/test-extra.yml +++ b/.github/workflows/test-extra.yml @@ -74,6 +74,37 @@ jobs: make --jobs=5 --output-sync=target -C backend/python/sentencetransformers make --jobs=5 --output-sync=target -C backend/python/sentencetransformers test + + tests-rerankers: + runs-on: ubuntu-latest + steps: + - name: Clone + uses: actions/checkout@v4 + with: + submodules: true + - name: Dependencies + run: | + sudo apt-get update + sudo apt-get install build-essential ffmpeg + curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \ + sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \ + gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \ + sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \ + sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \ + sudo apt-get update && \ + sudo apt-get install -y conda + sudo apt-get install -y ca-certificates cmake curl patch python3-pip + sudo apt-get install -y libopencv-dev + pip install --user grpcio-tools + + sudo rm -rfv /usr/bin/conda || true + + - name: Test rerankers + run: | + export PATH=$PATH:/opt/conda/bin + make --jobs=5 --output-sync=target -C backend/python/rerankers + make --jobs=5 --output-sync=target -C backend/python/rerankers test + tests-diffusers: runs-on: ubuntu-latest steps: diff --git a/Dockerfile b/Dockerfile index 4bc8b35e..4d12cb56 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,7 +16,7 @@ ARG TARGETVARIANT ENV BUILD_TYPE=${BUILD_TYPE} ENV DEBIAN_FRONTEND=noninteractive -ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh" +ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh" ARG GO_TAGS="stablediffusion tinydream tts" @@ -259,6 +259,9 @@ RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ make -C backend/python/sentencetransformers \ ; fi +RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ + make -C backend/python/rerankers \ + ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ make -C backend/python/transformers \ ; fi diff --git a/Makefile b/Makefile index 662e54bd..b017982e 100644 --- a/Makefile +++ b/Makefile @@ -437,10 +437,10 @@ protogen-go-clean: $(RM) bin/* .PHONY: protogen-python -protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen +protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen .PHONY: protogen-python-clean -protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean +protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean .PHONY: autogptq-protogen autogptq-protogen: @@ -506,6 +506,14 @@ petals-protogen: petals-protogen-clean: $(MAKE) -C backend/python/petals protogen-clean +.PHONY: rerankers-protogen +rerankers-protogen: + $(MAKE) -C backend/python/rerankers protogen + +.PHONY: rerankers-protogen-clean +rerankers-protogen-clean: + $(MAKE) -C backend/python/rerankers protogen-clean + .PHONY: sentencetransformers-protogen sentencetransformers-protogen: $(MAKE) -C backend/python/sentencetransformers protogen @@ -564,6 +572,7 @@ prepare-extra-conda-environments: protogen-python $(MAKE) -C backend/python/vllm $(MAKE) -C backend/python/mamba $(MAKE) -C backend/python/sentencetransformers + $(MAKE) -C backend/python/rerankers $(MAKE) -C backend/python/transformers $(MAKE) -C backend/python/transformers-musicgen $(MAKE) -C backend/python/parler-tts diff --git a/aio/cpu/rerank.yaml b/aio/cpu/rerank.yaml new file mode 100644 index 00000000..b84755a8 --- /dev/null +++ b/aio/cpu/rerank.yaml @@ -0,0 +1,27 @@ +name: jina-reranker-v1-base-en +backend: rerankers +parameters: + model: cross-encoder + +usage: | + You can test this model with curl like this: + + curl http://localhost:8080/v1/rerank \ + -H "Content-Type: application/json" \ + -d '{ + "model": "jina-reranker-v1-base-en", + "query": "Organic skincare products for sensitive skin", + "documents": [ + "Eco-friendly kitchenware for modern homes", + "Biodegradable cleaning supplies for eco-conscious consumers", + "Organic cotton baby clothes for sensitive skin", + "Natural organic skincare range for sensitive skin", + "Tech gadgets for smart homes: 2024 edition", + "Sustainable gardening tools and compost solutions", + "Sensitive skin-friendly facial cleansers and toners", + "Organic food wraps and storage solutions", + "All-natural pet food for dogs with allergies", + "Yoga mats made from recycled materials" + ], + "top_n": 3 + }' diff --git a/aio/entrypoint.sh b/aio/entrypoint.sh index 5fd8d9c2..2487e64f 100755 --- a/aio/entrypoint.sh +++ b/aio/entrypoint.sh @@ -129,7 +129,7 @@ detect_gpu detect_gpu_size PROFILE="${PROFILE:-$GPU_SIZE}" # default to cpu -export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vision.yaml}" +export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vision.yaml}" check_vars diff --git a/aio/gpu-8g/rerank.yaml b/aio/gpu-8g/rerank.yaml new file mode 100644 index 00000000..b84755a8 --- /dev/null +++ b/aio/gpu-8g/rerank.yaml @@ -0,0 +1,27 @@ +name: jina-reranker-v1-base-en +backend: rerankers +parameters: + model: cross-encoder + +usage: | + You can test this model with curl like this: + + curl http://localhost:8080/v1/rerank \ + -H "Content-Type: application/json" \ + -d '{ + "model": "jina-reranker-v1-base-en", + "query": "Organic skincare products for sensitive skin", + "documents": [ + "Eco-friendly kitchenware for modern homes", + "Biodegradable cleaning supplies for eco-conscious consumers", + "Organic cotton baby clothes for sensitive skin", + "Natural organic skincare range for sensitive skin", + "Tech gadgets for smart homes: 2024 edition", + "Sustainable gardening tools and compost solutions", + "Sensitive skin-friendly facial cleansers and toners", + "Organic food wraps and storage solutions", + "All-natural pet food for dogs with allergies", + "Yoga mats made from recycled materials" + ], + "top_n": 3 + }' diff --git a/aio/intel/rerank.yaml b/aio/intel/rerank.yaml new file mode 100644 index 00000000..b84755a8 --- /dev/null +++ b/aio/intel/rerank.yaml @@ -0,0 +1,27 @@ +name: jina-reranker-v1-base-en +backend: rerankers +parameters: + model: cross-encoder + +usage: | + You can test this model with curl like this: + + curl http://localhost:8080/v1/rerank \ + -H "Content-Type: application/json" \ + -d '{ + "model": "jina-reranker-v1-base-en", + "query": "Organic skincare products for sensitive skin", + "documents": [ + "Eco-friendly kitchenware for modern homes", + "Biodegradable cleaning supplies for eco-conscious consumers", + "Organic cotton baby clothes for sensitive skin", + "Natural organic skincare range for sensitive skin", + "Tech gadgets for smart homes: 2024 edition", + "Sustainable gardening tools and compost solutions", + "Sensitive skin-friendly facial cleansers and toners", + "Organic food wraps and storage solutions", + "All-natural pet food for dogs with allergies", + "Yoga mats made from recycled materials" + ], + "top_n": 3 + }' diff --git a/backend/backend.proto b/backend/backend.proto index ec01e4a7..778a96ff 100644 --- a/backend/backend.proto +++ b/backend/backend.proto @@ -23,6 +23,30 @@ service Backend { rpc StoresDelete(StoresDeleteOptions) returns (Result) {} rpc StoresGet(StoresGetOptions) returns (StoresGetResult) {} rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {} + + rpc Rerank(RerankRequest) returns (RerankResult) {} +} + +message RerankRequest { + string query = 1; + repeated string documents = 2; + int32 top_n = 3; +} + +message RerankResult { + Usage usage = 1; + repeated DocumentResult results = 2; +} + +message Usage { + int32 total_tokens = 1; + int32 prompt_tokens = 2; +} + +message DocumentResult { + int32 index = 1; + string text = 2; + float relevance_score = 3; } message StoresKey { diff --git a/backend/python/common-env/transformers/transformers-nvidia.yml b/backend/python/common-env/transformers/transformers-nvidia.yml index e12b5dbb..16e494c5 100644 --- a/backend/python/common-env/transformers/transformers-nvidia.yml +++ b/backend/python/common-env/transformers/transformers-nvidia.yml @@ -120,4 +120,6 @@ dependencies: - transformers>=4.38.2 # Updated Version - transformers_stream_generator==0.0.5 - xformers==0.0.23.post1 + - rerankers[transformers] + - pydantic prefix: /opt/conda/envs/transformers diff --git a/backend/python/common-env/transformers/transformers-rocm.yml b/backend/python/common-env/transformers/transformers-rocm.yml index 48fac8bf..cdefcc27 100644 --- a/backend/python/common-env/transformers/transformers-rocm.yml +++ b/backend/python/common-env/transformers/transformers-rocm.yml @@ -108,4 +108,6 @@ dependencies: - transformers>=4.38.2 # Updated Version - transformers_stream_generator==0.0.5 - xformers==0.0.23.post1 + - rerankers[transformers] + - pydantic prefix: /opt/conda/envs/transformers diff --git a/backend/python/common-env/transformers/transformers.yml b/backend/python/common-env/transformers/transformers.yml index 843b13fa..5c069dd0 100644 --- a/backend/python/common-env/transformers/transformers.yml +++ b/backend/python/common-env/transformers/transformers.yml @@ -111,5 +111,7 @@ dependencies: - vllm>=0.4.0 - transformers>=4.38.2 # Updated Version - transformers_stream_generator==0.0.5 - - xformers==0.0.23.post1 + - xformers==0.0.23.post1 + - rerankers[transformers] + - pydantic prefix: /opt/conda/envs/transformers diff --git a/backend/python/rerankers/Makefile b/backend/python/rerankers/Makefile new file mode 100644 index 00000000..f029c841 --- /dev/null +++ b/backend/python/rerankers/Makefile @@ -0,0 +1,27 @@ +.PHONY: rerankers +rerankers: protogen + $(MAKE) -C ../common-env/transformers + + +.PHONY: run +run: protogen + @echo "Running rerankers..." + bash run.sh + @echo "rerankers run." + +# It is not working well by using command line. It only6 works with IDE like VSCode. +.PHONY: test +test: protogen + @echo "Testing rerankers..." + bash test.sh + @echo "rerankers tested." + +.PHONY: protogen +protogen: backend_pb2_grpc.py backend_pb2.py + +.PHONY: protogen-clean +protogen-clean: + $(RM) backend_pb2_grpc.py backend_pb2.py + +backend_pb2_grpc.py backend_pb2.py: + python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto \ No newline at end of file diff --git a/backend/python/rerankers/README.md b/backend/python/rerankers/README.md new file mode 100644 index 00000000..9e73ba0a --- /dev/null +++ b/backend/python/rerankers/README.md @@ -0,0 +1,5 @@ +# Creating a separate environment for the reranker project + +``` +make reranker +``` \ No newline at end of file diff --git a/backend/python/rerankers/reranker.py b/backend/python/rerankers/reranker.py new file mode 100755 index 00000000..e1974ad5 --- /dev/null +++ b/backend/python/rerankers/reranker.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python3 +""" +Extra gRPC server for Rerankers models. +""" +from concurrent import futures + +import argparse +import signal +import sys +import os + +import time +import backend_pb2 +import backend_pb2_grpc + +import grpc + +from rerankers import Reranker + +_ONE_DAY_IN_SECONDS = 60 * 60 * 24 + +# If MAX_WORKERS are specified in the environment use it, otherwise default to 1 +MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1')) + +# Implement the BackendServicer class with the service methods +class BackendServicer(backend_pb2_grpc.BackendServicer): + """ + A gRPC servicer for the backend service. + + This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding. + """ + def Health(self, request, context): + """ + A gRPC method that returns the health status of the backend service. + + Args: + request: A HealthRequest object that contains the request parameters. + context: A grpc.ServicerContext object that provides information about the RPC. + + Returns: + A Reply object that contains the health status of the backend service. + """ + return backend_pb2.Reply(message=bytes("OK", 'utf-8')) + + def LoadModel(self, request, context): + """ + A gRPC method that loads a model into memory. + + Args: + request: A LoadModelRequest object that contains the request parameters. + context: A grpc.ServicerContext object that provides information about the RPC. + + Returns: + A Result object that contains the result of the LoadModel operation. + """ + model_name = request.Model + try: + kwargs = {} + if request.Type != "": + kwargs['model_type'] = request.Type + if request.PipelineType != "": # Reuse the PipelineType field for language + kwargs['lang'] = request.PipelineType + self.model_name = model_name + self.model = Reranker(model_name, **kwargs) + except Exception as err: + return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") + + # Implement your logic here for the LoadModel service + # Replace this with your desired response + return backend_pb2.Result(message="Model loaded successfully", success=True) + + def Rerank(self, request, context): + documents = [] + for idx, doc in enumerate(request.documents): + documents.append(doc) + ranked_results=self.model.rank(query=request.query, docs=documents, doc_ids=list(range(len(request.documents)))) + # Prepare results to return + results = [ + backend_pb2.DocumentResult( + index=res.doc_id, + text=res.text, + relevance_score=res.score + ) for res in ranked_results.results + ] + + # Calculate the usage and total tokens + # TODO: Implement the usage calculation with reranker + total_tokens = sum(len(doc.split()) for doc in request.documents) + len(request.query.split()) + prompt_tokens = len(request.query.split()) + usage = backend_pb2.Usage(total_tokens=total_tokens, prompt_tokens=prompt_tokens) + return backend_pb2.RerankResult(usage=usage, results=results) + +def serve(address): + server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)) + backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server) + server.add_insecure_port(address) + server.start() + print("Server started. Listening on: " + address, file=sys.stderr) + + # Define the signal handler function + def signal_handler(sig, frame): + print("Received termination signal. Shutting down...") + server.stop(0) + sys.exit(0) + + # Set the signal handlers for SIGINT and SIGTERM + signal.signal(signal.SIGINT, signal_handler) + signal.signal(signal.SIGTERM, signal_handler) + + try: + while True: + time.sleep(_ONE_DAY_IN_SECONDS) + except KeyboardInterrupt: + server.stop(0) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Run the gRPC server.") + parser.add_argument( + "--addr", default="localhost:50051", help="The address to bind the server to." + ) + args = parser.parse_args() + + serve(args.addr) diff --git a/backend/python/rerankers/run.sh b/backend/python/rerankers/run.sh new file mode 100755 index 00000000..16d8a0bd --- /dev/null +++ b/backend/python/rerankers/run.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +## +## A bash script wrapper that runs the reranker server with conda + +export PATH=$PATH:/opt/conda/bin + +# Activate conda environment +source activate transformers + +# get the directory where the bash script is located +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + +python $DIR/reranker.py $@ diff --git a/backend/python/rerankers/test.sh b/backend/python/rerankers/test.sh new file mode 100755 index 00000000..75316829 --- /dev/null +++ b/backend/python/rerankers/test.sh @@ -0,0 +1,11 @@ +#!/bin/bash +## +## A bash script wrapper that runs the reranker server with conda + +# Activate conda environment +source activate transformers + +# get the directory where the bash script is located +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + +python -m unittest $DIR/test_reranker.py \ No newline at end of file diff --git a/backend/python/rerankers/test_reranker.py b/backend/python/rerankers/test_reranker.py new file mode 100755 index 00000000..c1cf3d70 --- /dev/null +++ b/backend/python/rerankers/test_reranker.py @@ -0,0 +1,90 @@ +""" +A test script to test the gRPC service +""" +import unittest +import subprocess +import time +import backend_pb2 +import backend_pb2_grpc + +import grpc + + +class TestBackendServicer(unittest.TestCase): + """ + TestBackendServicer is the class that tests the gRPC service + """ + def setUp(self): + """ + This method sets up the gRPC service by starting the server + """ + self.service = subprocess.Popen(["python3", "reranker.py", "--addr", "localhost:50051"]) + time.sleep(10) + + def tearDown(self) -> None: + """ + This method tears down the gRPC service by terminating the server + """ + self.service.kill() + self.service.wait() + + def test_server_startup(self): + """ + This method tests if the server starts up successfully + """ + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.Health(backend_pb2.HealthMessage()) + self.assertEqual(response.message, b'OK') + except Exception as err: + print(err) + self.fail("Server failed to start") + finally: + self.tearDown() + + def test_load_model(self): + """ + This method tests if the model is loaded successfully + """ + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.LoadModel(backend_pb2.ModelOptions(Model="cross-encoder")) + self.assertTrue(response.success) + self.assertEqual(response.message, "Model loaded successfully") + except Exception as err: + print(err) + self.fail("LoadModel service failed") + finally: + self.tearDown() + + def test_rerank(self): + """ + This method tests if the embeddings are generated successfully + """ + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + request = backend_pb2.RerankRequest( + query="I love you", + documents=["I hate you", "I really like you"], + top_n=2 + ) + response = stub.LoadModel(backend_pb2.ModelOptions(Model="cross-encoder")) + self.assertTrue(response.success) + + rerank_response = stub.Rerank(request) + print(rerank_response.results[0]) + self.assertIsNotNone(rerank_response.results) + self.assertEqual(len(rerank_response.results), 2) + self.assertEqual(rerank_response.results[0].text, "I really like you") + self.assertEqual(rerank_response.results[1].text, "I hate you") + except Exception as err: + print(err) + self.fail("Reranker service failed") + finally: + self.tearDown() \ No newline at end of file diff --git a/core/backend/rerank.go b/core/backend/rerank.go new file mode 100644 index 00000000..810223aa --- /dev/null +++ b/core/backend/rerank.go @@ -0,0 +1,39 @@ +package backend + +import ( + "context" + "fmt" + + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/pkg/grpc/proto" + model "github.com/go-skynet/LocalAI/pkg/model" +) + +func Rerank(backend, modelFile string, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) { + bb := backend + if bb == "" { + return nil, fmt.Errorf("backend is required") + } + + grpcOpts := gRPCModelOpts(backendConfig) + + opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{ + model.WithBackendString(bb), + model.WithModel(modelFile), + model.WithContext(appConfig.Context), + model.WithAssetDir(appConfig.AssetsDestination), + model.WithLoadGRPCLoadModelOpts(grpcOpts), + }) + rerankModel, err := loader.BackendLoader(opts...) + if err != nil { + return nil, err + } + + if rerankModel == nil { + return nil, fmt.Errorf("could not load rerank model") + } + + res, err := rerankModel.Rerank(context.Background(), request) + + return res, err +} diff --git a/core/http/app.go b/core/http/app.go index 21652dd9..93eb0e20 100644 --- a/core/http/app.go +++ b/core/http/app.go @@ -194,6 +194,7 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi routes.RegisterOpenAIRoutes(app, cl, ml, appConfig, auth) routes.RegisterPagesRoutes(app, cl, ml, appConfig, auth) routes.RegisterUIRoutes(app, cl, ml, appConfig, galleryService, auth) + routes.RegisterJINARoutes(app, cl, ml, appConfig, auth) // Define a custom 404 handler // Note: keep this at the bottom! diff --git a/core/http/endpoints/jina/rerank.go b/core/http/endpoints/jina/rerank.go new file mode 100644 index 00000000..bf99367e --- /dev/null +++ b/core/http/endpoints/jina/rerank.go @@ -0,0 +1,84 @@ +package jina + +import ( + "github.com/go-skynet/LocalAI/core/backend" + "github.com/go-skynet/LocalAI/core/config" + + fiberContext "github.com/go-skynet/LocalAI/core/http/ctx" + "github.com/go-skynet/LocalAI/core/schema" + "github.com/go-skynet/LocalAI/pkg/grpc/proto" + "github.com/go-skynet/LocalAI/pkg/model" + "github.com/gofiber/fiber/v2" + "github.com/rs/zerolog/log" +) + +func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + req := new(schema.JINARerankRequest) + if err := c.BodyParser(req); err != nil { + return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{ + "error": "Cannot parse JSON", + }) + } + + input := new(schema.TTSRequest) + + // Get input data from the request body + if err := c.BodyParser(input); err != nil { + return err + } + + modelFile, err := fiberContext.ModelFromContext(c, ml, input.Model, false) + if err != nil { + modelFile = input.Model + log.Warn().Msgf("Model not found in context: %s", input.Model) + } + + cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath, + config.LoadOptionDebug(appConfig.Debug), + config.LoadOptionThreads(appConfig.Threads), + config.LoadOptionContextSize(appConfig.ContextSize), + config.LoadOptionF16(appConfig.F16), + ) + + if err != nil { + modelFile = input.Model + log.Warn().Msgf("Model not found in context: %s", input.Model) + } else { + modelFile = cfg.Model + } + log.Debug().Msgf("Request for model: %s", modelFile) + + if input.Backend != "" { + cfg.Backend = input.Backend + } + + request := &proto.RerankRequest{ + Query: req.Query, + TopN: int32(req.TopN), + Documents: req.Documents, + } + + results, err := backend.Rerank(cfg.Backend, modelFile, request, ml, appConfig, *cfg) + if err != nil { + return err + } + + response := &schema.JINARerankResponse{ + Model: req.Model, + } + + for _, r := range results.Results { + response.Results = append(response.Results, schema.JINADocumentResult{ + Index: int(r.Index), + Document: schema.JINAText{Text: r.Text}, + RelevanceScore: float64(r.RelevanceScore), + }) + } + + response.Usage.TotalTokens = int(results.Usage.TotalTokens) + response.Usage.PromptTokens = int(results.Usage.PromptTokens) + + return c.Status(fiber.StatusOK).JSON(response) + } +} diff --git a/core/http/routes/jina.go b/core/http/routes/jina.go new file mode 100644 index 00000000..9c32c72b --- /dev/null +++ b/core/http/routes/jina.go @@ -0,0 +1,19 @@ +package routes + +import ( + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/http/endpoints/jina" + + "github.com/go-skynet/LocalAI/pkg/model" + "github.com/gofiber/fiber/v2" +) + +func RegisterJINARoutes(app *fiber.App, + cl *config.BackendConfigLoader, + ml *model.ModelLoader, + appConfig *config.ApplicationConfig, + auth func(*fiber.Ctx) error) { + + // POST endpoint to mimic the reranking + app.Post("/v1/rerank", jina.JINARerankEndpoint(cl, ml, appConfig)) +} diff --git a/core/schema/jina.go b/core/schema/jina.go new file mode 100644 index 00000000..7f80689c --- /dev/null +++ b/core/schema/jina.go @@ -0,0 +1,34 @@ +package schema + +// RerankRequest defines the structure of the request payload +type JINARerankRequest struct { + Model string `json:"model"` + Query string `json:"query"` + Documents []string `json:"documents"` + TopN int `json:"top_n"` +} + +// DocumentResult represents a single document result +type JINADocumentResult struct { + Index int `json:"index"` + Document JINAText `json:"document"` + RelevanceScore float64 `json:"relevance_score"` +} + +// Text holds the text of the document +type JINAText struct { + Text string `json:"text"` +} + +// RerankResponse defines the structure of the response payload +type JINARerankResponse struct { + Model string `json:"model"` + Usage JINAUsageInfo `json:"usage"` + Results []JINADocumentResult `json:"results"` +} + +// UsageInfo holds information about usage of tokens +type JINAUsageInfo struct { + TotalTokens int `json:"total_tokens"` + PromptTokens int `json:"prompt_tokens"` +} diff --git a/pkg/grpc/backend.go b/pkg/grpc/backend.go index 8fb8c39d..bef9e186 100644 --- a/pkg/grpc/backend.go +++ b/pkg/grpc/backend.go @@ -49,4 +49,6 @@ type Backend interface { StoresDelete(ctx context.Context, in *pb.StoresDeleteOptions, opts ...grpc.CallOption) (*pb.Result, error) StoresGet(ctx context.Context, in *pb.StoresGetOptions, opts ...grpc.CallOption) (*pb.StoresGetResult, error) StoresFind(ctx context.Context, in *pb.StoresFindOptions, opts ...grpc.CallOption) (*pb.StoresFindResult, error) + + Rerank(ctx context.Context, in *pb.RerankRequest, opts ...grpc.CallOption) (*pb.RerankResult, error) } diff --git a/pkg/grpc/client.go b/pkg/grpc/client.go index 882db12a..fc4a12fa 100644 --- a/pkg/grpc/client.go +++ b/pkg/grpc/client.go @@ -355,3 +355,19 @@ func (c *Client) StoresFind(ctx context.Context, in *pb.StoresFindOptions, opts client := pb.NewBackendClient(conn) return client.StoresFind(ctx, in, opts...) } + +func (c *Client) Rerank(ctx context.Context, in *pb.RerankRequest, opts ...grpc.CallOption) (*pb.RerankResult, error) { + if !c.parallel { + c.opMutex.Lock() + defer c.opMutex.Unlock() + } + c.setBusy(true) + defer c.setBusy(false) + conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials())) + if err != nil { + return nil, err + } + defer conn.Close() + client := pb.NewBackendClient(conn) + return client.Rerank(ctx, in, opts...) +} diff --git a/pkg/grpc/embed.go b/pkg/grpc/embed.go index 73b185a3..694e83b0 100644 --- a/pkg/grpc/embed.go +++ b/pkg/grpc/embed.go @@ -101,6 +101,10 @@ func (e *embedBackend) StoresFind(ctx context.Context, in *pb.StoresFindOptions, return e.s.StoresFind(ctx, in) } +func (e *embedBackend) Rerank(ctx context.Context, in *pb.RerankRequest, opts ...grpc.CallOption) (*pb.RerankResult, error) { + return e.s.Rerank(ctx, in) +} + type embedBackendServerStream struct { ctx context.Context fn func(s []byte) From 48d0aa2f6da0b1c039fa062e61facf5e6191420e Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 25 Apr 2024 01:28:02 +0200 Subject: [PATCH 0120/2648] models(gallery): add new models to the gallery (#2124) * models: add reranker and parler-tts-mini Signed-off-by: Ettore Di Giacinto * fix: chatml im_end should not have a newline Signed-off-by: Ettore Di Giacinto * models(noromaid): add Signed-off-by: Ettore Di Giacinto * models(llama3): add 70b, add dolphin2.9 Signed-off-by: Ettore Di Giacinto * models(llama3): add unholy-8b Signed-off-by: Ettore Di Giacinto * models(llama3): add therapyllama3, aura Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto --- aio/cpu/text-to-text.yaml | 6 +- aio/gpu-8g/text-to-text.yaml | 6 +- aio/intel/text-to-text.yaml | 6 +- embedded/models/hermes-2-pro-mistral.yaml | 6 +- gallery/hermes-2-pro-mistral.yaml | 9 +- gallery/index.yaml | 205 +++++++++++++++++++++- gallery/noromaid.yaml | 53 ++++++ gallery/parler-tts.yaml | 2 + gallery/rerankers.yaml | 2 + pkg/model/loader_test.go | 11 +- 10 files changed, 272 insertions(+), 34 deletions(-) create mode 100644 gallery/noromaid.yaml create mode 100644 gallery/parler-tts.yaml create mode 100644 gallery/rerankers.yaml diff --git a/aio/cpu/text-to-text.yaml b/aio/cpu/text-to-text.yaml index cf18f659..f2f6aeb4 100644 --- a/aio/cpu/text-to-text.yaml +++ b/aio/cpu/text-to-text.yaml @@ -21,8 +21,7 @@ template: {{- else if eq .RoleName "tool" }} - {{- end }} - <|im_end|> + {{- end }}<|im_end|> # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling function: | <|im_start|>system @@ -37,8 +36,7 @@ template: For each function call return a json object with function name and arguments within XML tags as follows: {'arguments': , 'name': } - - <|im_end|> + <|im_end|> {{.Input -}} <|im_start|>assistant diff --git a/aio/gpu-8g/text-to-text.yaml b/aio/gpu-8g/text-to-text.yaml index 0407bb22..dc620a13 100644 --- a/aio/gpu-8g/text-to-text.yaml +++ b/aio/gpu-8g/text-to-text.yaml @@ -21,8 +21,7 @@ template: {{- else if eq .RoleName "tool" }} - {{- end }} - <|im_end|> + {{- end }}<|im_end|> # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling function: | <|im_start|>system @@ -37,8 +36,7 @@ template: For each function call return a json object with function name and arguments within XML tags as follows: {'arguments': , 'name': } - - <|im_end|> + <|im_end|> {{.Input -}} <|im_start|>assistant diff --git a/aio/intel/text-to-text.yaml b/aio/intel/text-to-text.yaml index f5f93c14..bd6b87ba 100644 --- a/aio/intel/text-to-text.yaml +++ b/aio/intel/text-to-text.yaml @@ -22,8 +22,7 @@ template: {{- else if eq .RoleName "tool" }} - {{- end }} - <|im_end|> + {{- end }}<|im_end|> # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling function: | <|im_start|>system @@ -38,8 +37,7 @@ template: For each function call return a json object with function name and arguments within XML tags as follows: {'arguments': , 'name': } - - <|im_end|> + <|im_end|> {{.Input -}} <|im_start|>assistant diff --git a/embedded/models/hermes-2-pro-mistral.yaml b/embedded/models/hermes-2-pro-mistral.yaml index dd18ce6f..74d98eeb 100644 --- a/embedded/models/hermes-2-pro-mistral.yaml +++ b/embedded/models/hermes-2-pro-mistral.yaml @@ -21,8 +21,7 @@ template: {{- else if eq .RoleName "tool" }} - {{- end }} - <|im_end|> + {{- end }}<|im_end|> # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling function: | <|im_start|>system @@ -37,8 +36,7 @@ template: For each function call return a json object with function name and arguments within XML tags as follows: {'arguments': , 'name': } - - <|im_end|> + <|im_end|> {{.Input -}} <|im_start|>assistant diff --git a/gallery/hermes-2-pro-mistral.yaml b/gallery/hermes-2-pro-mistral.yaml index d4771a11..b1dc0ff1 100644 --- a/gallery/hermes-2-pro-mistral.yaml +++ b/gallery/hermes-2-pro-mistral.yaml @@ -3,9 +3,6 @@ name: "hermes-2-pro-mistral" config_file: | mmap: true - parameters: - model: Hermes-2-Pro-Mistral-7B.Q6_K.gguf - template: chat_message: | <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} @@ -24,8 +21,7 @@ config_file: | {{- else if eq .RoleName "tool" }} - {{- end }} - <|im_end|> + {{- end }}<|im_end|> # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling function: | <|im_start|>system @@ -40,8 +36,7 @@ config_file: | For each function call return a json object with function name and arguments within XML tags as follows: {'arguments': , 'name': } - - <|im_end|> + <|im_end|> {{.Input -}} <|im_start|>assistant diff --git a/gallery/index.yaml b/gallery/index.yaml index deab29cf..a5de760d 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1,5 +1,35 @@ -## LLM +### START parler-tts +- &parler-tts + url: "github:mudler/LocalAI/gallery/parler-tts.yaml@master" + name: parler-tts-mini-v0.1 + parameters: + model: parler-tts/parler_tts_mini_v0.1 + license: apache-2.0 + description: | + Parler-TTS is a lightweight text-to-speech (TTS) model that can generate high-quality, natural sounding speech in the style of a given speaker (gender, pitch, speaking style, etc). It is a reproduction of work from the paper Natural language guidance of high-fidelity text-to-speech with synthetic annotations by Dan Lyth and Simon King, from Stability AI and Edinburgh University respectively. + urls: + - https://github.com/huggingface/parler-tts + tags: + - tts + - gpu + - cpu + - text-to-speech + - python +### START rerankers +- &rerankers + url: "github:mudler/LocalAI/gallery/rerankers.yaml@master" + name: cross-encoder + parameters: + model: cross-encoder + license: apache-2.0 + description: | + A cross-encoder model that can be used for reranking + tags: + - reranker + - gpu + - python +## LLMs ### START LLAMA3 - &llama3 url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master" @@ -20,20 +50,177 @@ Model Architecture Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety. urls: - https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct - + - https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF tags: - llm - gguf - gpu - cpu + - llama3 overrides: parameters: model: Meta-Llama-3-8B-Instruct-Q5_K_M.gguf files: - - filename: vicuna-7b-q5_k.gguf - sha256: cce3ba85525027d0fff520cad053d5a6f32c293382a40b3d55a650282c267787 - uri: huggingface://second-state/Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct-Q5_K_M.gguf + - filename: Meta-Llama-3-8B-Instruct.Q4_0.gguf + sha256: 19ded996fe6c60254dc7544d782276eff41046ed42aa5f2d0005dc457e5c0895 + uri: huggingface://QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct.Q4_0.gguf +- <<: *llama3 + name: "llama3-8b-instruct:Q6_K" + overrides: + parameters: + model: Meta-Llama-3-8B-Instruct.Q6_K.gguf + files: + - filename: Meta-Llama-3-8B-Instruct.Q6_K.gguf + sha256: b7bad45618e2a76cc1e89a0fbb93a2cac9bf410e27a619c8024ed6db53aa9b4a + uri: huggingface://QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct.Q6_K.gguf +- <<: *llama3 + name: "llama3-70b-instruct" + overrides: + parameters: + model: Meta-Llama-3-70B-Instruct.Q4_K_M.gguf + files: + - filename: Meta-Llama-3-70B-Instruct.Q4_K_M.gguf + sha256: d559de8dd806a76dbd29f8d8bd04666f2b29e7c7872d8e8481abd07805884d72 + uri: huggingface://MaziyarPanahi/Meta-Llama-3-70B-Instruct-GGUF/Meta-Llama-3-70B-Instruct.Q4_K_M.gguf +- <<: *llama3 + name: "llama-3-unholy-8b" + urls: + - https://huggingface.co/Undi95/Llama-3-Unholy-8B-GGUF + icon: https://cdn-uploads.huggingface.co/production/uploads/63ab1241ad514ca8d1430003/JmdBlOHlBHVmX1IbZzWSv.png + description: | + Use at your own risk, I'm not responsible for any usage of this model, don't try to do anything this model tell you to do. + Basic uncensoring, this model is epoch 3 out of 4 (but it seem enough at 3). + + If you are censored, it's maybe because of keyword like "assistant", "Factual answer", or other "sweet words" like I call them. + overrides: + parameters: + model: Llama-3-Unholy-8B.q4_k_m.gguf + files: + - filename: Llama-3-Unholy-8B.q4_k_m.gguf + sha256: 17b7f716bce1b34d4aa99ee730a19a834f8c77ddb36090dde5a1eda963f93602 + uri: huggingface://Undi95/Llama-3-Unholy-8B-GGUF/Llama-3-Unholy-8B.q4_k_m.gguf +- <<: *llama3 + name: "llama-3-unholy-8b:Q8_0" + urls: + - https://huggingface.co/Undi95/Llama-3-Unholy-8B-GGUF + icon: https://cdn-uploads.huggingface.co/production/uploads/63ab1241ad514ca8d1430003/JmdBlOHlBHVmX1IbZzWSv.png + description: | + Use at your own risk, I'm not responsible for any usage of this model, don't try to do anything this model tell you to do. + + Basic uncensoring, this model is epoch 3 out of 4 (but it seem enough at 3). + + If you are censored, it's maybe because of keyword like "assistant", "Factual answer", or other "sweet words" like I call them. + overrides: + parameters: + model: Llama-3-Unholy-8B.q8_0.gguf + files: + - filename: Llama-3-Unholy-8B.q8_0.gguf + sha256: 8d4137018acdcd57df4beccc84d9ad3f7f08cac50588f76370afc16c85752702 + uri: huggingface://Undi95/Llama-3-Unholy-8B-GGUF/Llama-3-Unholy-8B.q8_0.gguf +- <<: *llama3 + name: "therapyllama-8b-v1" + urls: + - https://huggingface.co/victunes/TherapyLlama-8B-v1-GGUF + icon: https://cdn-uploads.huggingface.co/production/uploads/65f07d05279d2d8f725bf0c3/A-ckcZ9H0Ee1n_ls2FM41.png + description: | + Trained on Llama 3 8B using a modified version of jerryjalapeno/nart-100k-synthetic. + + It is a Llama 3 version of https://huggingface.co/victunes/TherapyBeagle-11B-v2 + + TherapyLlama is hopefully aligned to be helpful, healthy, and comforting. + Usage: + Do not hold back on Buddy. + Open up to Buddy. + Pour your heart out to Buddy. + Engage with Buddy. + Remember that Buddy is just an AI. + Notes: + + Tested with the Llama 3 Format + You might be assigned a random name if you don't give yourself one. + Chat format was pretty stale? + + Disclaimer + + TherapyLlama is NOT a real therapist. It is a friendly AI that mimics empathy and psychotherapy. It is an illusion without the slightest clue who you are as a person. As much as it can help you with self-discovery, A LLAMA IS NOT A SUBSTITUTE to a real professional. + overrides: + parameters: + model: TherapyLlama-8B-v1-Q4_K_M.gguf + files: + - filename: TherapyLlama-8B-v1-Q4_K_M.gguf + sha256: 3d5a16d458e074a7bc7e706a493d8e95e8a7b2cb16934c851aece0af9d1da14a + uri: huggingface://victunes/TherapyLlama-8B-v1-GGUF/TherapyLlama-8B-v1-Q4_K_M.gguf +- <<: *llama3 + name: "aura-uncensored-l3-8b-iq-imatrix" + urls: + - https://huggingface.co/Lewdiculous/Aura_Uncensored_l3_8B-GGUF-IQ-Imatrix + icon: https://cdn-uploads.huggingface.co/production/uploads/626dfb8786671a29c715f8a9/oiYHWIEHqmgUkY0GsVdDx.png + description: | + This is another better atempt at a less censored Llama-3 with hopefully more stable formatting. + overrides: + parameters: + model: Aura_Uncensored_l3_8B-Q4_K_M-imat.gguf + files: + - filename: Aura_Uncensored_l3_8B-Q4_K_M-imat.gguf + sha256: 265ded6a4f439bec160f394e3083a4a20e32ebb9d1d2d85196aaab23dab87fb2 + uri: huggingface://Lewdiculous/Aura_Uncensored_l3_8B-GGUF-IQ-Imatrix/Aura_Uncensored_l3_8B-Q4_K_M-imat.gguf +- &dolphin + name: "dolphin-2.9-llama3-8b" + url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master" + urls: + - https://huggingface.co/cognitivecomputations/dolphin-2.9-llama3-8b-gguf + tags: + - llm + - gguf + - gpu + - cpu + - llama3 + license: llama3 + description: | + Dolphin-2.9 has a variety of instruction, conversational, and coding skills. It also has initial agentic abilities and supports function calling. + Dolphin is uncensored. + Curated and trained by Eric Hartford, Lucas Atkins, and Fernando Fernandes, and Cognitive Computations + icon: https://cdn-uploads.huggingface.co/production/uploads/63111b2d88942700629f5771/ldkN1J0WIDQwU4vutGYiD.png + overrides: + parameters: + model: dolphin-2.9-llama3-8b-q4_K_M.gguf + files: + - filename: dolphin-2.9-llama3-8b-q4_K_M.gguf + sha256: be988199ce28458e97205b11ae9d9cf4e3d8e18ff4c784e75bfc12f54407f1a1 + uri: huggingface://cognitivecomputations/dolphin-2.9-llama3-8b-gguf/dolphin-2.9-llama3-8b-q4_K_M.gguf +- <<: *dolphin + name: "dolphin-2.9-llama3-8b:Q6_K" + overrides: + parameters: + model: dolphin-2.9-llama3-8b-q6_K.gguf + files: + - filename: dolphin-2.9-llama3-8b-q6_K.gguf + sha256: 8aac72a0bd72c075ba7be1aa29945e47b07d39cd16be9a80933935f51b57fb32 + uri: huggingface://cognitivecomputations/dolphin-2.9-llama3-8b-gguf/dolphin-2.9-llama3-8b-q6_K.gguf +## LLama2 and derivatives + +### Start noromaid +- &noromaid + url: "github:mudler/LocalAI/gallery/noromaid.yaml@master" + name: "noromaid-13b-0.4-DPO" + icon: https://cdn-uploads.huggingface.co/production/uploads/630dfb008df86f1e5becadc3/VKX2Z2yjZX5J8kXzgeCYO.png + license: cc-by-nc-4.0 + urls: + - https://huggingface.co/NeverSleep/Noromaid-13B-0.4-DPO-GGUF + tags: + - llm + - llama2 + - gguf + - gpu + - cpu + overrides: + parameters: + model: Noromaid-13B-0.4-DPO.q4_k_m.gguf + files: + - filename: Noromaid-13B-0.4-DPO.q4_k_m.gguf + sha256: cb28e878d034fae3d0b43326c5fc1cfb4ab583b17c56e41d6ce023caec03c1c1 + uri: huggingface://NeverSleep/Noromaid-13B-0.4-DPO-GGUF/Noromaid-13B-0.4-DPO.q4_k_m.gguf ### START LLaVa - &llava url: "github:mudler/LocalAI/gallery/llava.yaml@master" @@ -50,6 +237,7 @@ - multimodal - gguf - gpu + - llama2 - cpu name: "llava-1.6-vicuna" overrides: @@ -117,6 +305,7 @@ - llm - gguf - gpu + - llama2 - cpu name: "phi-2-chat:Q8_0" overrides: @@ -149,6 +338,7 @@ tags: - llm - gguf + - llama2 - gpu - cpu name: "phi-2-orange" @@ -175,6 +365,7 @@ - llm - gguf - gpu + - llama2 - cpu overrides: parameters: @@ -217,6 +408,7 @@ - llm - gguf - gpu + - llama2 - cpu overrides: parameters: @@ -262,6 +454,7 @@ - llm - gguf - gpu + - llama2 - cpu overrides: parameters: @@ -281,6 +474,7 @@ - gpu - cpu - embeddings + - python name: "all-MiniLM-L6-v2" url: "github:mudler/LocalAI/gallery/sentencetransformers.yaml@master" overrides: @@ -302,6 +496,7 @@ tags: - text-to-image - stablediffusion + - python - sd-1.5 - gpu url: "github:mudler/LocalAI/gallery/dreamshaper.yaml@master" diff --git a/gallery/noromaid.yaml b/gallery/noromaid.yaml new file mode 100644 index 00000000..0b9badfe --- /dev/null +++ b/gallery/noromaid.yaml @@ -0,0 +1,53 @@ +config_file: | + mmap: true + backend: llama-cpp + template: + chat_message: | + <|im_{{if eq .RoleName "assistant"}}bot{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}|> + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + + {{- end }} + {{- if .Content}} + {{.Content }} + {{- end }} + {{- if .FunctionCall}} + {{toJson .FunctionCall}} + {{- end }} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + + {{- end }}<|im_end|> + # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling + function: | + <|im_system|> + You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: + + {{range .Functions}} + {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} + {{end}} + + Use the following pydantic model json schema for each tool call you will make: + {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']} + For each function call return a json object with function name and arguments within XML tags as follows: + + {'arguments': , 'name': } + <|im_end|> + {{.Input -}} + <|im_bot|> + + chat: | + {{.Input -}} + <|im_bot|> + completion: | + {{.Input}} + context_size: 4096 + f16: true + stopwords: + - <|im_end|> + - + - "\n" + - "\n\n\n" + diff --git a/gallery/parler-tts.yaml b/gallery/parler-tts.yaml new file mode 100644 index 00000000..76252b1d --- /dev/null +++ b/gallery/parler-tts.yaml @@ -0,0 +1,2 @@ +config_file: | + backend: parler-tts diff --git a/gallery/rerankers.yaml b/gallery/rerankers.yaml new file mode 100644 index 00000000..dbbad5a0 --- /dev/null +++ b/gallery/rerankers.yaml @@ -0,0 +1,2 @@ +config_file: | + backend: rerankers \ No newline at end of file diff --git a/pkg/model/loader_test.go b/pkg/model/loader_test.go index d3956b63..c0768051 100644 --- a/pkg/model/loader_test.go +++ b/pkg/model/loader_test.go @@ -24,8 +24,7 @@ const chatML = `<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq {{- else if eq .RoleName "tool" }} -{{- end }} -<|im_end|>` +{{- end }}<|im_end|>` const llama3 = `<|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|> @@ -107,7 +106,7 @@ var llama3TestMatch map[string]map[string]interface{} = map[string]map[string]in var chatMLTestMatch map[string]map[string]interface{} = map[string]map[string]interface{}{ "user": { "template": chatML, - "expected": "<|im_start|>user\nA long time ago in a galaxy far, far away...\n<|im_end|>", + "expected": "<|im_start|>user\nA long time ago in a galaxy far, far away...<|im_end|>", "data": model.ChatMessageTemplateData{ SystemPrompt: "", Role: "user", @@ -122,7 +121,7 @@ var chatMLTestMatch map[string]map[string]interface{} = map[string]map[string]in }, "assistant": { "template": chatML, - "expected": "<|im_start|>assistant\nA long time ago in a galaxy far, far away...\n<|im_end|>", + "expected": "<|im_start|>assistant\nA long time ago in a galaxy far, far away...<|im_end|>", "data": model.ChatMessageTemplateData{ SystemPrompt: "", Role: "assistant", @@ -137,7 +136,7 @@ var chatMLTestMatch map[string]map[string]interface{} = map[string]map[string]in }, "function_call": { "template": chatML, - "expected": "<|im_start|>assistant\n\n{\"function\":\"test\"}\n\n<|im_end|>", + "expected": "<|im_start|>assistant\n\n{\"function\":\"test\"}\n<|im_end|>", "data": model.ChatMessageTemplateData{ SystemPrompt: "", Role: "assistant", @@ -152,7 +151,7 @@ var chatMLTestMatch map[string]map[string]interface{} = map[string]map[string]in }, "function_response": { "template": chatML, - "expected": "<|im_start|>tool\n\nResponse from tool\n\n<|im_end|>", + "expected": "<|im_start|>tool\n\nResponse from tool\n<|im_end|>", "data": model.ChatMessageTemplateData{ SystemPrompt: "", Role: "tool", From 758b0c904294d397d540cdc31a40de25945beb99 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 25 Apr 2024 06:49:29 +0000 Subject: [PATCH 0121/2648] build(deps): bump pydantic from 1.10.7 to 1.10.13 in /examples/langchain/langchainpy-localai-example in the pip group across 1 directory (#2125) build(deps): bump pydantic Bumps the pip group with 1 update in the /examples/langchain/langchainpy-localai-example directory: [pydantic](https://github.com/pydantic/pydantic). Updates `pydantic` from 1.10.7 to 1.10.13 - [Release notes](https://github.com/pydantic/pydantic/releases) - [Changelog](https://github.com/pydantic/pydantic/blob/main/HISTORY.md) - [Commits](https://github.com/pydantic/pydantic/compare/v1.10.7...v1.10.13) --- updated-dependencies: - dependency-name: pydantic dependency-type: direct:production dependency-group: pip ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- examples/langchain/langchainpy-localai-example/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt index ba7f8429..68a960a6 100644 --- a/examples/langchain/langchainpy-localai-example/requirements.txt +++ b/examples/langchain/langchainpy-localai-example/requirements.txt @@ -20,7 +20,7 @@ numpy==1.24.3 openai==0.27.6 openapi-schema-pydantic==1.2.4 packaging==23.1 -pydantic==1.10.7 +pydantic==1.10.13 PyYAML==6.0 requests==2.31.0 SQLAlchemy==2.0.12 From 60690c9fc4da2246b006b4a8c95355431d4ec20a Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 25 Apr 2024 15:11:01 +0200 Subject: [PATCH 0122/2648] ci: add swagger pipeline Signed-off-by: Ettore Di Giacinto --- .github/workflows/update_swagger.yaml | 31 +++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 .github/workflows/update_swagger.yaml diff --git a/.github/workflows/update_swagger.yaml b/.github/workflows/update_swagger.yaml new file mode 100644 index 00000000..878f5a72 --- /dev/null +++ b/.github/workflows/update_swagger.yaml @@ -0,0 +1,31 @@ +name: Update swagger +on: + schedule: + - cron: 0 20 * * * + workflow_dispatch: +jobs: + swagger: + strategy: + fail-fast: false + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 + with: + go-version: 'stable' + - run: | + go install github.com/swaggo/swag/cmd/swag@latest + - name: Bump swagger 🔧 + run: | + make swagger + - name: Create Pull Request + uses: peter-evans/create-pull-request@v6 + with: + token: ${{ secrets.UPDATE_BOT_TOKEN }} + push-to-fork: ci-forks/LocalAI + commit-message: 'feat(swagger): update swagger' + title: 'feat(swagger): update swagger' + branch: "update/swagger" + body: Update swagger + signoff: true + From aa8e1c63d523ef8333ab01e010629b0848d4ded5 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 25 Apr 2024 15:52:52 +0200 Subject: [PATCH 0123/2648] Create yaml-check.yml Signed-off-by: Ettore Di Giacinto --- .github/workflows/yaml-check.yml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 .github/workflows/yaml-check.yml diff --git a/.github/workflows/yaml-check.yml b/.github/workflows/yaml-check.yml new file mode 100644 index 00000000..e2c45718 --- /dev/null +++ b/.github/workflows/yaml-check.yml @@ -0,0 +1,20 @@ +name: json-yaml-validate +on: + push: + branches: + - main + pull_request: + workflow_dispatch: + +permissions: + contents: read + +jobs: + json-yaml-validate: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: json-yaml-validate + id: json-yaml-validate + uses: GrantBirki/json-yaml-validate@v2.7.1 From 1b0a64aa46ed5c29828b09452e5ecadc5b71cbbf Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 25 Apr 2024 15:57:06 +0200 Subject: [PATCH 0124/2648] Update yaml-check.yml Signed-off-by: Ettore Di Giacinto --- .github/workflows/yaml-check.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/yaml-check.yml b/.github/workflows/yaml-check.yml index e2c45718..f83f03aa 100644 --- a/.github/workflows/yaml-check.yml +++ b/.github/workflows/yaml-check.yml @@ -18,3 +18,5 @@ jobs: - name: json-yaml-validate id: json-yaml-validate uses: GrantBirki/json-yaml-validate@v2.7.1 + with: + base_dir: ./gallery From 5d170e926461d4f73d5e92655b98d50175876268 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 25 Apr 2024 16:05:02 +0200 Subject: [PATCH 0125/2648] Update yaml-check.yml Signed-off-by: Ettore Di Giacinto --- .github/workflows/yaml-check.yml | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/.github/workflows/yaml-check.yml b/.github/workflows/yaml-check.yml index f83f03aa..68b5e987 100644 --- a/.github/workflows/yaml-check.yml +++ b/.github/workflows/yaml-check.yml @@ -1,22 +1,18 @@ -name: json-yaml-validate +name: 'Yamllint GitHub Actions' on: - push: - branches: - - main - pull_request: - workflow_dispatch: - -permissions: - contents: read - + - pull_request jobs: - json-yaml-validate: + yamllint: + name: 'Yamllint' runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - - - name: json-yaml-validate - id: json-yaml-validate - uses: GrantBirki/json-yaml-validate@v2.7.1 + - name: 'Checkout' + uses: actions/checkout@master + - name: 'Yamllint' + uses: karancode/yamllint-github-action@master with: - base_dir: ./gallery + yamllint_file_or_dir: 'gallery' + yamllint_strict: false + yamllint_comment: true + env: + GITHUB_ACCESS_TOKEN: ${{ secrets.GITHUB_TOKEN }} From 2ada13b1add9cd6eb126517f85f27f395bfdd921 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 25 Apr 2024 16:06:18 +0200 Subject: [PATCH 0126/2648] models(gallery): add more models (#2129) Signed-off-by: Ettore Di Giacinto --- gallery/cerbero.yaml | 19 ++++++ gallery/index.yaml | 139 ++++++++++++++++++++++++++++++++++++++- gallery/vicuna-chat.yaml | 21 ++++++ 3 files changed, 178 insertions(+), 1 deletion(-) create mode 100644 gallery/cerbero.yaml create mode 100644 gallery/vicuna-chat.yaml diff --git a/gallery/cerbero.yaml b/gallery/cerbero.yaml new file mode 100644 index 00000000..265d4019 --- /dev/null +++ b/gallery/cerbero.yaml @@ -0,0 +1,19 @@ +config_file: | + backend: llama-cpp + context_size: 8192 + f16: false + name: cerbero + + template: + completion: "{{.Input}}" + chat: "Questa è una conversazione tra un umano ed un assistente AI.\n{{.Input}}\n[|Assistente|] " + roles: + user: "[|Umano|] " + system: "[|Umano|] " + assistant: "[|Assistente|] " + + stopwords: + - "[|Umano|]" + + trimsuffix: + - "\n" diff --git a/gallery/index.yaml b/gallery/index.yaml index a5de760d..8edb9df1 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -82,6 +82,65 @@ - filename: Meta-Llama-3-70B-Instruct.Q4_K_M.gguf sha256: d559de8dd806a76dbd29f8d8bd04666f2b29e7c7872d8e8481abd07805884d72 uri: huggingface://MaziyarPanahi/Meta-Llama-3-70B-Instruct-GGUF/Meta-Llama-3-70B-Instruct.Q4_K_M.gguf +- <<: *llama3 + name: "llama-3-sauerkrautlm-8b-instruct" + urls: + - https://huggingface.co/bartowski/Llama-3-SauerkrautLM-8b-Instruct-GGUF + icon: https://vago-solutions.ai/wp-content/uploads/2024/04/Llama3-Pic.png + description: | + SauerkrautLM-llama-3-8B-Instruct + + Model Type: Llama-3-SauerkrautLM-8b-Instruct is a finetuned Model based on meta-llama/Meta-Llama-3-8B-Instruct + Language(s): German, English + overrides: + parameters: + model: Llama-3-SauerkrautLM-8b-Instruct-Q4_K_M.gguf + files: + - filename: Llama-3-SauerkrautLM-8b-Instruct-Q4_K_M.gguf + sha256: 5833d99d5596cade0d02e61cddaa6dac49170864ee56d0b602933c6f9fbae314 + uri: huggingface://bartowski/Llama-3-SauerkrautLM-8b-Instruct-GGUF/Llama-3-SauerkrautLM-8b-Instruct-Q4_K_M.gguf +- <<: *llama3 + name: "llama-3-13b-instruct-v0.1" + urls: + - https://huggingface.co/MaziyarPanahi/Llama-3-13B-Instruct-v0.1-GGUF + icon: https://huggingface.co/MaziyarPanahi/Llama-3-13B-Instruct-v0.1/resolve/main/llama-3-merges.webp + description: | + This model is a self-merge of meta-llama/Meta-Llama-3-8B-Instruct model. + overrides: + parameters: + model: Llama-3-13B-Instruct-v0.1.Q4_K_M.gguf + files: + - filename: Llama-3-13B-Instruct-v0.1.Q4_K_M.gguf + sha256: 071a28043c271d259b5ffa883d19a9e0b33269b55148c4abaf5f95da4d084266 + uri: huggingface://MaziyarPanahi/Llama-3-13B-Instruct-v0.1-GGUF/Llama-3-13B-Instruct-v0.1.Q4_K_M.gguf +- <<: *llama3 + name: "llama-3-smaug-8b" + urls: + - https://huggingface.co/MaziyarPanahi/Llama-3-Smaug-8B-GGUF + icon: https://cdn-uploads.huggingface.co/production/uploads/64c14f95cac5f9ba52bbcd7f/OrcJyTaUtD2HxJOPPwNva.png + description: | + This model was built using the Smaug recipe for improving performance on real world multi-turn conversations applied to meta-llama/Meta-Llama-3-8B. + overrides: + parameters: + model: Llama-3-Smaug-8B.Q4_K_M.gguf + files: + - filename: Llama-3-Smaug-8B.Q4_K_M.gguf + sha256: b17c4c1144768ead9e8a96439165baf49e98c53d458b4da8827f137fbabf38c1 + uri: huggingface://MaziyarPanahi/Llama-3-Smaug-8B-GGUF/Llama-3-Smaug-8B.Q4_K_M.gguf +- <<: *llama3 + name: "llama-3-8b-openhermes-dpo" + urls: + - https://huggingface.co/mradermacher/Llama3-8B-OpenHermes-DPO-GGUF + icon: https://cdn-uploads.huggingface.co/production/uploads/64fc6d81d75293f417fee1d1/QF2OsDu9DJKP4QYPBu4aK.png + description: | + Llama3-8B-OpenHermes-DPO is DPO-Finetuned model of Llama3-8B, on the OpenHermes-2.5 preference dataset using QLoRA. + overrides: + parameters: + model: Llama3-8B-OpenHermes-DPO.Q4_K_M.gguf + files: + - filename: Llama3-8B-OpenHermes-DPO.Q4_K_M.gguf + sha256: 1147e5881cb1d67796916e6cab7dab0ae0f532a4c1e626c9e92861e5f67752ca + uri: huggingface://mradermacher/Llama3-8B-OpenHermes-DPO-GGUF/Llama3-8B-OpenHermes-DPO.Q4_K_M.gguf - <<: *llama3 name: "llama-3-unholy-8b" urls: @@ -100,6 +159,42 @@ - filename: Llama-3-Unholy-8B.q4_k_m.gguf sha256: 17b7f716bce1b34d4aa99ee730a19a834f8c77ddb36090dde5a1eda963f93602 uri: huggingface://Undi95/Llama-3-Unholy-8B-GGUF/Llama-3-Unholy-8B.q4_k_m.gguf +- <<: *llama3 + name: "lexi-llama-3-8b-uncensored" + urls: + - https://huggingface.co/NikolayKozloff/Lexi-Llama-3-8B-Uncensored-Q6_K-GGUF + icon: https://cdn-uploads.huggingface.co/production/uploads/644ad182f434a6a63b18eee6/H6axm5mlmiOWnbIFvx_em.png + description: | + Lexi is uncensored, which makes the model compliant. You are advised to implement your own alignment layer before exposing the model as a service. It will be highly compliant with any requests, even unethical ones. + + You are responsible for any content you create using this model. Please use it responsibly. + + Lexi is licensed according to Meta's Llama license. I grant permission for any use, including commercial, that falls within accordance with Meta's Llama-3 license. + overrides: + parameters: + model: lexi-llama-3-8b-uncensored.Q6_K.gguf + files: + - filename: lexi-llama-3-8b-uncensored.Q6_K.gguf + sha256: 5805f3856cc18a769fae0b7c5659fe6778574691c370c910dad6eeec62c62436 + uri: huggingface://NikolayKozloff/Lexi-Llama-3-8B-Uncensored-Q6_K-GGUF/lexi-llama-3-8b-uncensored.Q6_K.gguf +- <<: *llama3 + name: "chaos-rp_l3_b-iq-imatrix" + urls: + - https://huggingface.co/Lewdiculous/Chaos_RP_l3_8B-GGUF-IQ-Imatrix + icon: https://cdn-uploads.huggingface.co/production/uploads/626dfb8786671a29c715f8a9/u5p9kdbXT2QQA3iMU0vF1.png + description: | + A chaotic force beckons for you, will you heed her call? + + Built upon an intelligent foundation and tuned for roleplaying, this model will fulfill your wildest fantasies with the bare minimum of effort. + + Enjoy! + overrides: + parameters: + model: Chaos_RP_l3_8B-Q4_K_M-imat.gguf + files: + - filename: Chaos_RP_l3_8B-Q4_K_M-imat.gguf + sha256: 4273c5a8f23d49bf6294e620a5aa1fcd78d491ea0b90d0ec63ad708eedb83893 + uri: huggingface://Lewdiculous/Chaos_RP_l3_8B-GGUF-IQ-Imatrix/Chaos_RP_l3_8B-Q4_K_M-imat.gguf - <<: *llama3 name: "llama-3-unholy-8b:Q8_0" urls: @@ -199,7 +294,30 @@ sha256: 8aac72a0bd72c075ba7be1aa29945e47b07d39cd16be9a80933935f51b57fb32 uri: huggingface://cognitivecomputations/dolphin-2.9-llama3-8b-gguf/dolphin-2.9-llama3-8b-q6_K.gguf ## LLama2 and derivatives +### Start Fimbulvetr +- &vicuna-chat + url: "github:mudler/LocalAI/gallery/vicuna-chat.yaml@master" + name: "fimbulvetr-11b-v2" + icon: https://huggingface.co/Sao10K/Fimbulvetr-11B-v2/resolve/main/cute1.jpg + license: llama2 + description: | + Cute girl to catch your attention. + urls: + - https://huggingface.co/Sao10K/Fimbulvetr-11B-v2-GGUF + tags: + - llm + - gguf + - gpu + - cpu + - llama3 + overrides: + parameters: + model: Fimbulvetr-11B-v2-Test-14.q4_K_M.gguf + files: + - filename: Fimbulvetr-11B-v2-Test-14.q4_K_M.gguf + sha256: 3597dacfb0ab717d565d8a4d6067f10dcb0e26cc7f21c832af1a10a87882a8fd + uri: huggingface://Sao10K/Fimbulvetr-11B-v2-GGUF/Fimbulvetr-11B-v2-Test-14.q4_K_M.gguf ### Start noromaid - &noromaid url: "github:mudler/LocalAI/gallery/noromaid.yaml@master" @@ -436,7 +554,26 @@ sha256: "b6d95d7ec9a395b7568cc94b0447fd4f90b6f69d6e44794b1fbb84e3f732baca" uri: "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q8_0.gguf" ### END Hermes-2-Pro-Mistral - +### START Cerbero +- url: "github:mudler/LocalAI/gallery/cerbero.yaml@master" + icon: https://huggingface.co/galatolo/cerbero-7b/resolve/main/README.md.d/cerbero.png + description: | + cerbero-7b is specifically crafted to fill the void in Italy's AI landscape. + urls: + - https://huggingface.co/galatolo/cerbero-7b + tags: + - llm + - gguf + - gpu + - cpu + - mistral + overrides: + parameters: + model: galatolo-Q4_K.gguf + files: + - filename: "galatolo-Q4_K.gguf" + sha256: "ca0cfd5a9ad40dc16416aa3a277015d0299b62c0803b67f5709580042202c172" + uri: "huggingface://galatolo/cerbero-7b-gguf/ggml-model-Q4_K.gguf" ### START Codellama - &codellama url: "github:mudler/LocalAI/gallery/codellama.yaml@master" diff --git a/gallery/vicuna-chat.yaml b/gallery/vicuna-chat.yaml new file mode 100644 index 00000000..9669cce0 --- /dev/null +++ b/gallery/vicuna-chat.yaml @@ -0,0 +1,21 @@ +name: "vicuna-chat" + +description: | + Vicuna chat + +license: "LLaMA" + +config_file: | + backend: llama-cpp + context_size: 4096 + roles: + user: "User: " + system: "System: " + assistant: "Assistant: " + f16: true + template: + completion: | + Complete the following sentence: {{.Input}} + chat: | + {{.Input}} + ASSISTANT: \ No newline at end of file From 4ae4e4450697ae3ff51f159fee4eeab83aeb08c4 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 25 Apr 2024 16:10:08 +0200 Subject: [PATCH 0127/2648] feat(swagger): update swagger (#2128) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- swagger/docs.go | 35 +++++++++++++++++------------------ swagger/swagger.json | 32 ++++++++++++++++---------------- swagger/swagger.yaml | 32 ++++++++++++++++---------------- 3 files changed, 49 insertions(+), 50 deletions(-) diff --git a/swagger/docs.go b/swagger/docs.go index e0199673..cc4fe085 100644 --- a/swagger/docs.go +++ b/swagger/docs.go @@ -1,5 +1,4 @@ -// Code generated by swaggo/swag. DO NOT EDIT. - +// Package swagger Code generated by swaggo/swag. DO NOT EDIT package swagger import "github.com/swaggo/swag" @@ -235,7 +234,7 @@ const docTemplate = `{ } }, "definitions": { - "grammar.Argument": { + "functions.Argument": { "type": "object", "properties": { "properties": { @@ -247,7 +246,7 @@ const docTemplate = `{ } } }, - "grammar.Function": { + "functions.Function": { "type": "object", "properties": { "description": { @@ -262,7 +261,7 @@ const docTemplate = `{ } } }, - "grammar.FunctionName": { + "functions.FunctionName": { "type": "object", "properties": { "const": { @@ -270,18 +269,18 @@ const docTemplate = `{ } } }, - "grammar.Item": { + "functions.Item": { "type": "object", "properties": { "properties": { - "$ref": "#/definitions/grammar.Properties" + "$ref": "#/definitions/functions.Properties" }, "type": { "type": "string" } } }, - "grammar.JSONFunctionStructure": { + "functions.JSONFunctionStructure": { "type": "object", "properties": { "$defs": { @@ -291,33 +290,33 @@ const docTemplate = `{ "anyOf": { "type": "array", "items": { - "$ref": "#/definitions/grammar.Item" + "$ref": "#/definitions/functions.Item" } }, "oneOf": { "type": "array", "items": { - "$ref": "#/definitions/grammar.Item" + "$ref": "#/definitions/functions.Item" } } } }, - "grammar.Properties": { + "functions.Properties": { "type": "object", "properties": { "arguments": { - "$ref": "#/definitions/grammar.Argument" + "$ref": "#/definitions/functions.Argument" }, "function": { - "$ref": "#/definitions/grammar.FunctionName" + "$ref": "#/definitions/functions.FunctionName" } } }, - "grammar.Tool": { + "functions.Tool": { "type": "object", "properties": { "function": { - "$ref": "#/definitions/grammar.Function" + "$ref": "#/definitions/functions.Function" }, "type": { "type": "string" @@ -565,7 +564,7 @@ const docTemplate = `{ "description": "A list of available functions to call", "type": "array", "items": { - "$ref": "#/definitions/grammar.Function" + "$ref": "#/definitions/functions.Function" } }, "grammar": { @@ -573,7 +572,7 @@ const docTemplate = `{ "type": "string" }, "grammar_json_functions": { - "$ref": "#/definitions/grammar.JSONFunctionStructure" + "$ref": "#/definitions/functions.JSONFunctionStructure" }, "ignore_eos": { "type": "boolean" @@ -673,7 +672,7 @@ const docTemplate = `{ "tools": { "type": "array", "items": { - "$ref": "#/definitions/grammar.Tool" + "$ref": "#/definitions/functions.Tool" } }, "top_k": { diff --git a/swagger/swagger.json b/swagger/swagger.json index 4d7102c4..d7febeb3 100644 --- a/swagger/swagger.json +++ b/swagger/swagger.json @@ -227,7 +227,7 @@ } }, "definitions": { - "grammar.Argument": { + "functions.Argument": { "type": "object", "properties": { "properties": { @@ -239,7 +239,7 @@ } } }, - "grammar.Function": { + "functions.Function": { "type": "object", "properties": { "description": { @@ -254,7 +254,7 @@ } } }, - "grammar.FunctionName": { + "functions.FunctionName": { "type": "object", "properties": { "const": { @@ -262,18 +262,18 @@ } } }, - "grammar.Item": { + "functions.Item": { "type": "object", "properties": { "properties": { - "$ref": "#/definitions/grammar.Properties" + "$ref": "#/definitions/functions.Properties" }, "type": { "type": "string" } } }, - "grammar.JSONFunctionStructure": { + "functions.JSONFunctionStructure": { "type": "object", "properties": { "$defs": { @@ -283,33 +283,33 @@ "anyOf": { "type": "array", "items": { - "$ref": "#/definitions/grammar.Item" + "$ref": "#/definitions/functions.Item" } }, "oneOf": { "type": "array", "items": { - "$ref": "#/definitions/grammar.Item" + "$ref": "#/definitions/functions.Item" } } } }, - "grammar.Properties": { + "functions.Properties": { "type": "object", "properties": { "arguments": { - "$ref": "#/definitions/grammar.Argument" + "$ref": "#/definitions/functions.Argument" }, "function": { - "$ref": "#/definitions/grammar.FunctionName" + "$ref": "#/definitions/functions.FunctionName" } } }, - "grammar.Tool": { + "functions.Tool": { "type": "object", "properties": { "function": { - "$ref": "#/definitions/grammar.Function" + "$ref": "#/definitions/functions.Function" }, "type": { "type": "string" @@ -557,7 +557,7 @@ "description": "A list of available functions to call", "type": "array", "items": { - "$ref": "#/definitions/grammar.Function" + "$ref": "#/definitions/functions.Function" } }, "grammar": { @@ -565,7 +565,7 @@ "type": "string" }, "grammar_json_functions": { - "$ref": "#/definitions/grammar.JSONFunctionStructure" + "$ref": "#/definitions/functions.JSONFunctionStructure" }, "ignore_eos": { "type": "boolean" @@ -665,7 +665,7 @@ "tools": { "type": "array", "items": { - "$ref": "#/definitions/grammar.Tool" + "$ref": "#/definitions/functions.Tool" } }, "top_k": { diff --git a/swagger/swagger.yaml b/swagger/swagger.yaml index 86caff8a..919dd896 100644 --- a/swagger/swagger.yaml +++ b/swagger/swagger.yaml @@ -1,6 +1,6 @@ basePath: / definitions: - grammar.Argument: + functions.Argument: properties: properties: additionalProperties: true @@ -8,7 +8,7 @@ definitions: type: type: string type: object - grammar.Function: + functions.Function: properties: description: type: string @@ -18,43 +18,43 @@ definitions: additionalProperties: true type: object type: object - grammar.FunctionName: + functions.FunctionName: properties: const: type: string type: object - grammar.Item: + functions.Item: properties: properties: - $ref: '#/definitions/grammar.Properties' + $ref: '#/definitions/functions.Properties' type: type: string type: object - grammar.JSONFunctionStructure: + functions.JSONFunctionStructure: properties: $defs: additionalProperties: true type: object anyOf: items: - $ref: '#/definitions/grammar.Item' + $ref: '#/definitions/functions.Item' type: array oneOf: items: - $ref: '#/definitions/grammar.Item' + $ref: '#/definitions/functions.Item' type: array type: object - grammar.Properties: + functions.Properties: properties: arguments: - $ref: '#/definitions/grammar.Argument' + $ref: '#/definitions/functions.Argument' function: - $ref: '#/definitions/grammar.FunctionName' + $ref: '#/definitions/functions.FunctionName' type: object - grammar.Tool: + functions.Tool: properties: function: - $ref: '#/definitions/grammar.Function' + $ref: '#/definitions/functions.Function' type: type: string type: object @@ -221,13 +221,13 @@ definitions: functions: description: A list of available functions to call items: - $ref: '#/definitions/grammar.Function' + $ref: '#/definitions/functions.Function' type: array grammar: description: A grammar to constrain the LLM output type: string grammar_json_functions: - $ref: '#/definitions/grammar.JSONFunctionStructure' + $ref: '#/definitions/functions.JSONFunctionStructure' ignore_eos: type: boolean input: {} @@ -297,7 +297,7 @@ definitions: tool_choice: {} tools: items: - $ref: '#/definitions/grammar.Tool' + $ref: '#/definitions/functions.Tool' type: array top_k: type: integer From 45761f8be22075a85134ce79e5070f92430c3f3b Mon Sep 17 00:00:00 2001 From: Dave Date: Thu, 25 Apr 2024 13:25:56 -0400 Subject: [PATCH 0128/2648] fix: yamlint warnings and errors (#2131) fix yamlint warnings and errors Signed-off-by: Dave Lee --- .yamllint | 4 + gallery/bert-embeddings.yaml | 7 +- gallery/cerbero.yaml | 3 +- gallery/codellama.yaml | 3 +- gallery/dreamshaper.yaml | 2 +- gallery/hermes-2-pro-mistral.yaml | 3 +- gallery/index.yaml | 528 +++++++++++++++--------------- gallery/llama3-instruct.yaml | 2 +- gallery/llava.yaml | 2 +- gallery/noromaid.yaml | 2 +- gallery/parler-tts.yaml | 1 + gallery/phi-2-chat.yaml | 3 +- gallery/phi-2-orange.yaml | 1 + gallery/phi-3-chat.yaml | 2 +- gallery/piper.yaml | 1 + gallery/rerankers.yaml | 3 +- gallery/sentencetransformers.yaml | 3 +- gallery/stablediffusion.yaml | 79 ++--- gallery/tinydream.yaml | 3 +- gallery/vicuna-chat.yaml | 3 +- gallery/virtual.yaml | 3 +- gallery/whisper-base.yaml | 8 +- 22 files changed, 340 insertions(+), 326 deletions(-) create mode 100644 .yamllint diff --git a/.yamllint b/.yamllint new file mode 100644 index 00000000..8b8a89eb --- /dev/null +++ b/.yamllint @@ -0,0 +1,4 @@ +extends: default + +rules: + line-length: disable \ No newline at end of file diff --git a/gallery/bert-embeddings.yaml b/gallery/bert-embeddings.yaml index 01f05f33..7ce61799 100644 --- a/gallery/bert-embeddings.yaml +++ b/gallery/bert-embeddings.yaml @@ -1,3 +1,4 @@ +--- name: "bert-embeddings" config_file: | @@ -6,6 +7,6 @@ config_file: | backend: bert-embeddings embeddings: true files: -- filename: "bert-MiniLM-L6-v2q4_0.bin" - sha256: "a5a174d8772c8a569faf9f3136c441f2c3855b5bf35ed32274294219533feaad" - uri: "https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin" \ No newline at end of file + - filename: "bert-MiniLM-L6-v2q4_0.bin" + sha256: "a5a174d8772c8a569faf9f3136c441f2c3855b5bf35ed32274294219533feaad" + uri: "https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin" diff --git a/gallery/cerbero.yaml b/gallery/cerbero.yaml index 265d4019..e3e857b9 100644 --- a/gallery/cerbero.yaml +++ b/gallery/cerbero.yaml @@ -1,3 +1,4 @@ +--- config_file: | backend: llama-cpp context_size: 8192 @@ -15,5 +16,5 @@ config_file: | stopwords: - "[|Umano|]" - trimsuffix: + trimsuffix: - "\n" diff --git a/gallery/codellama.yaml b/gallery/codellama.yaml index a4c3233f..b02ad87e 100644 --- a/gallery/codellama.yaml +++ b/gallery/codellama.yaml @@ -1,7 +1,8 @@ +--- name: "codellama" config_file: | backend: llama-cpp context_size: 4096 f16: true - mmap: true \ No newline at end of file + mmap: true diff --git a/gallery/dreamshaper.yaml b/gallery/dreamshaper.yaml index 219a1e53..03146280 100644 --- a/gallery/dreamshaper.yaml +++ b/gallery/dreamshaper.yaml @@ -1,6 +1,6 @@ +--- name: "dreamshaper" - config_file: | backend: diffusers step: 25 diff --git a/gallery/hermes-2-pro-mistral.yaml b/gallery/hermes-2-pro-mistral.yaml index b1dc0ff1..6abee631 100644 --- a/gallery/hermes-2-pro-mistral.yaml +++ b/gallery/hermes-2-pro-mistral.yaml @@ -1,6 +1,6 @@ +--- name: "hermes-2-pro-mistral" - config_file: | mmap: true template: @@ -52,4 +52,3 @@ config_file: | - - "\n" - "\n\n\n" - diff --git a/gallery/index.yaml b/gallery/index.yaml index 8edb9df1..56e434c5 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1,4 +1,4 @@ - +--- ### START parler-tts - &parler-tts url: "github:mudler/LocalAI/gallery/parler-tts.yaml@master" @@ -9,13 +9,13 @@ description: | Parler-TTS is a lightweight text-to-speech (TTS) model that can generate high-quality, natural sounding speech in the style of a given speaker (gender, pitch, speaking style, etc). It is a reproduction of work from the paper Natural language guidance of high-fidelity text-to-speech with synthetic annotations by Dan Lyth and Simon King, from Stability AI and Edinburgh University respectively. urls: - - https://github.com/huggingface/parler-tts + - https://github.com/huggingface/parler-tts tags: - - tts - - gpu - - cpu - - text-to-speech - - python + - tts + - gpu + - cpu + - text-to-speech + - python ### START rerankers - &rerankers url: "github:mudler/LocalAI/gallery/rerankers.yaml@master" @@ -26,9 +26,9 @@ description: | A cross-encoder model that can be used for reranking tags: - - reranker - - gpu - - python + - reranker + - gpu + - python ## LLMs ### START LLAMA3 - &llama3 @@ -49,43 +49,43 @@ Model Architecture Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety. urls: - - https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct - - https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF + - https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct + - https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF tags: - - llm - - gguf - - gpu - - cpu - - llama3 + - llm + - gguf + - gpu + - cpu + - llama3 overrides: parameters: model: Meta-Llama-3-8B-Instruct-Q5_K_M.gguf files: - - filename: Meta-Llama-3-8B-Instruct.Q4_0.gguf - sha256: 19ded996fe6c60254dc7544d782276eff41046ed42aa5f2d0005dc457e5c0895 - uri: huggingface://QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct.Q4_0.gguf + - filename: Meta-Llama-3-8B-Instruct.Q4_0.gguf + sha256: 19ded996fe6c60254dc7544d782276eff41046ed42aa5f2d0005dc457e5c0895 + uri: huggingface://QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct.Q4_0.gguf - <<: *llama3 name: "llama3-8b-instruct:Q6_K" overrides: parameters: model: Meta-Llama-3-8B-Instruct.Q6_K.gguf files: - - filename: Meta-Llama-3-8B-Instruct.Q6_K.gguf - sha256: b7bad45618e2a76cc1e89a0fbb93a2cac9bf410e27a619c8024ed6db53aa9b4a - uri: huggingface://QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct.Q6_K.gguf + - filename: Meta-Llama-3-8B-Instruct.Q6_K.gguf + sha256: b7bad45618e2a76cc1e89a0fbb93a2cac9bf410e27a619c8024ed6db53aa9b4a + uri: huggingface://QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct.Q6_K.gguf - <<: *llama3 name: "llama3-70b-instruct" overrides: parameters: model: Meta-Llama-3-70B-Instruct.Q4_K_M.gguf files: - - filename: Meta-Llama-3-70B-Instruct.Q4_K_M.gguf - sha256: d559de8dd806a76dbd29f8d8bd04666f2b29e7c7872d8e8481abd07805884d72 - uri: huggingface://MaziyarPanahi/Meta-Llama-3-70B-Instruct-GGUF/Meta-Llama-3-70B-Instruct.Q4_K_M.gguf + - filename: Meta-Llama-3-70B-Instruct.Q4_K_M.gguf + sha256: d559de8dd806a76dbd29f8d8bd04666f2b29e7c7872d8e8481abd07805884d72 + uri: huggingface://MaziyarPanahi/Meta-Llama-3-70B-Instruct-GGUF/Meta-Llama-3-70B-Instruct.Q4_K_M.gguf - <<: *llama3 name: "llama-3-sauerkrautlm-8b-instruct" urls: - - https://huggingface.co/bartowski/Llama-3-SauerkrautLM-8b-Instruct-GGUF + - https://huggingface.co/bartowski/Llama-3-SauerkrautLM-8b-Instruct-GGUF icon: https://vago-solutions.ai/wp-content/uploads/2024/04/Llama3-Pic.png description: | SauerkrautLM-llama-3-8B-Instruct @@ -96,13 +96,13 @@ parameters: model: Llama-3-SauerkrautLM-8b-Instruct-Q4_K_M.gguf files: - - filename: Llama-3-SauerkrautLM-8b-Instruct-Q4_K_M.gguf - sha256: 5833d99d5596cade0d02e61cddaa6dac49170864ee56d0b602933c6f9fbae314 - uri: huggingface://bartowski/Llama-3-SauerkrautLM-8b-Instruct-GGUF/Llama-3-SauerkrautLM-8b-Instruct-Q4_K_M.gguf + - filename: Llama-3-SauerkrautLM-8b-Instruct-Q4_K_M.gguf + sha256: 5833d99d5596cade0d02e61cddaa6dac49170864ee56d0b602933c6f9fbae314 + uri: huggingface://bartowski/Llama-3-SauerkrautLM-8b-Instruct-GGUF/Llama-3-SauerkrautLM-8b-Instruct-Q4_K_M.gguf - <<: *llama3 name: "llama-3-13b-instruct-v0.1" urls: - - https://huggingface.co/MaziyarPanahi/Llama-3-13B-Instruct-v0.1-GGUF + - https://huggingface.co/MaziyarPanahi/Llama-3-13B-Instruct-v0.1-GGUF icon: https://huggingface.co/MaziyarPanahi/Llama-3-13B-Instruct-v0.1/resolve/main/llama-3-merges.webp description: | This model is a self-merge of meta-llama/Meta-Llama-3-8B-Instruct model. @@ -110,13 +110,13 @@ parameters: model: Llama-3-13B-Instruct-v0.1.Q4_K_M.gguf files: - - filename: Llama-3-13B-Instruct-v0.1.Q4_K_M.gguf - sha256: 071a28043c271d259b5ffa883d19a9e0b33269b55148c4abaf5f95da4d084266 - uri: huggingface://MaziyarPanahi/Llama-3-13B-Instruct-v0.1-GGUF/Llama-3-13B-Instruct-v0.1.Q4_K_M.gguf + - filename: Llama-3-13B-Instruct-v0.1.Q4_K_M.gguf + sha256: 071a28043c271d259b5ffa883d19a9e0b33269b55148c4abaf5f95da4d084266 + uri: huggingface://MaziyarPanahi/Llama-3-13B-Instruct-v0.1-GGUF/Llama-3-13B-Instruct-v0.1.Q4_K_M.gguf - <<: *llama3 name: "llama-3-smaug-8b" urls: - - https://huggingface.co/MaziyarPanahi/Llama-3-Smaug-8B-GGUF + - https://huggingface.co/MaziyarPanahi/Llama-3-Smaug-8B-GGUF icon: https://cdn-uploads.huggingface.co/production/uploads/64c14f95cac5f9ba52bbcd7f/OrcJyTaUtD2HxJOPPwNva.png description: | This model was built using the Smaug recipe for improving performance on real world multi-turn conversations applied to meta-llama/Meta-Llama-3-8B. @@ -124,13 +124,13 @@ parameters: model: Llama-3-Smaug-8B.Q4_K_M.gguf files: - - filename: Llama-3-Smaug-8B.Q4_K_M.gguf - sha256: b17c4c1144768ead9e8a96439165baf49e98c53d458b4da8827f137fbabf38c1 - uri: huggingface://MaziyarPanahi/Llama-3-Smaug-8B-GGUF/Llama-3-Smaug-8B.Q4_K_M.gguf + - filename: Llama-3-Smaug-8B.Q4_K_M.gguf + sha256: b17c4c1144768ead9e8a96439165baf49e98c53d458b4da8827f137fbabf38c1 + uri: huggingface://MaziyarPanahi/Llama-3-Smaug-8B-GGUF/Llama-3-Smaug-8B.Q4_K_M.gguf - <<: *llama3 name: "llama-3-8b-openhermes-dpo" urls: - - https://huggingface.co/mradermacher/Llama3-8B-OpenHermes-DPO-GGUF + - https://huggingface.co/mradermacher/Llama3-8B-OpenHermes-DPO-GGUF icon: https://cdn-uploads.huggingface.co/production/uploads/64fc6d81d75293f417fee1d1/QF2OsDu9DJKP4QYPBu4aK.png description: | Llama3-8B-OpenHermes-DPO is DPO-Finetuned model of Llama3-8B, on the OpenHermes-2.5 preference dataset using QLoRA. @@ -138,13 +138,13 @@ parameters: model: Llama3-8B-OpenHermes-DPO.Q4_K_M.gguf files: - - filename: Llama3-8B-OpenHermes-DPO.Q4_K_M.gguf - sha256: 1147e5881cb1d67796916e6cab7dab0ae0f532a4c1e626c9e92861e5f67752ca - uri: huggingface://mradermacher/Llama3-8B-OpenHermes-DPO-GGUF/Llama3-8B-OpenHermes-DPO.Q4_K_M.gguf + - filename: Llama3-8B-OpenHermes-DPO.Q4_K_M.gguf + sha256: 1147e5881cb1d67796916e6cab7dab0ae0f532a4c1e626c9e92861e5f67752ca + uri: huggingface://mradermacher/Llama3-8B-OpenHermes-DPO-GGUF/Llama3-8B-OpenHermes-DPO.Q4_K_M.gguf - <<: *llama3 name: "llama-3-unholy-8b" urls: - - https://huggingface.co/Undi95/Llama-3-Unholy-8B-GGUF + - https://huggingface.co/Undi95/Llama-3-Unholy-8B-GGUF icon: https://cdn-uploads.huggingface.co/production/uploads/63ab1241ad514ca8d1430003/JmdBlOHlBHVmX1IbZzWSv.png description: | Use at your own risk, I'm not responsible for any usage of this model, don't try to do anything this model tell you to do. @@ -156,13 +156,13 @@ parameters: model: Llama-3-Unholy-8B.q4_k_m.gguf files: - - filename: Llama-3-Unholy-8B.q4_k_m.gguf - sha256: 17b7f716bce1b34d4aa99ee730a19a834f8c77ddb36090dde5a1eda963f93602 - uri: huggingface://Undi95/Llama-3-Unholy-8B-GGUF/Llama-3-Unholy-8B.q4_k_m.gguf + - filename: Llama-3-Unholy-8B.q4_k_m.gguf + sha256: 17b7f716bce1b34d4aa99ee730a19a834f8c77ddb36090dde5a1eda963f93602 + uri: huggingface://Undi95/Llama-3-Unholy-8B-GGUF/Llama-3-Unholy-8B.q4_k_m.gguf - <<: *llama3 name: "lexi-llama-3-8b-uncensored" urls: - - https://huggingface.co/NikolayKozloff/Lexi-Llama-3-8B-Uncensored-Q6_K-GGUF + - https://huggingface.co/NikolayKozloff/Lexi-Llama-3-8B-Uncensored-Q6_K-GGUF icon: https://cdn-uploads.huggingface.co/production/uploads/644ad182f434a6a63b18eee6/H6axm5mlmiOWnbIFvx_em.png description: | Lexi is uncensored, which makes the model compliant. You are advised to implement your own alignment layer before exposing the model as a service. It will be highly compliant with any requests, even unethical ones. @@ -174,13 +174,13 @@ parameters: model: lexi-llama-3-8b-uncensored.Q6_K.gguf files: - - filename: lexi-llama-3-8b-uncensored.Q6_K.gguf - sha256: 5805f3856cc18a769fae0b7c5659fe6778574691c370c910dad6eeec62c62436 - uri: huggingface://NikolayKozloff/Lexi-Llama-3-8B-Uncensored-Q6_K-GGUF/lexi-llama-3-8b-uncensored.Q6_K.gguf + - filename: lexi-llama-3-8b-uncensored.Q6_K.gguf + sha256: 5805f3856cc18a769fae0b7c5659fe6778574691c370c910dad6eeec62c62436 + uri: huggingface://NikolayKozloff/Lexi-Llama-3-8B-Uncensored-Q6_K-GGUF/lexi-llama-3-8b-uncensored.Q6_K.gguf - <<: *llama3 name: "chaos-rp_l3_b-iq-imatrix" urls: - - https://huggingface.co/Lewdiculous/Chaos_RP_l3_8B-GGUF-IQ-Imatrix + - https://huggingface.co/Lewdiculous/Chaos_RP_l3_8B-GGUF-IQ-Imatrix icon: https://cdn-uploads.huggingface.co/production/uploads/626dfb8786671a29c715f8a9/u5p9kdbXT2QQA3iMU0vF1.png description: | A chaotic force beckons for you, will you heed her call? @@ -192,13 +192,13 @@ parameters: model: Chaos_RP_l3_8B-Q4_K_M-imat.gguf files: - - filename: Chaos_RP_l3_8B-Q4_K_M-imat.gguf - sha256: 4273c5a8f23d49bf6294e620a5aa1fcd78d491ea0b90d0ec63ad708eedb83893 - uri: huggingface://Lewdiculous/Chaos_RP_l3_8B-GGUF-IQ-Imatrix/Chaos_RP_l3_8B-Q4_K_M-imat.gguf + - filename: Chaos_RP_l3_8B-Q4_K_M-imat.gguf + sha256: 4273c5a8f23d49bf6294e620a5aa1fcd78d491ea0b90d0ec63ad708eedb83893 + uri: huggingface://Lewdiculous/Chaos_RP_l3_8B-GGUF-IQ-Imatrix/Chaos_RP_l3_8B-Q4_K_M-imat.gguf - <<: *llama3 name: "llama-3-unholy-8b:Q8_0" urls: - - https://huggingface.co/Undi95/Llama-3-Unholy-8B-GGUF + - https://huggingface.co/Undi95/Llama-3-Unholy-8B-GGUF icon: https://cdn-uploads.huggingface.co/production/uploads/63ab1241ad514ca8d1430003/JmdBlOHlBHVmX1IbZzWSv.png description: | Use at your own risk, I'm not responsible for any usage of this model, don't try to do anything this model tell you to do. @@ -210,20 +210,20 @@ parameters: model: Llama-3-Unholy-8B.q8_0.gguf files: - - filename: Llama-3-Unholy-8B.q8_0.gguf - sha256: 8d4137018acdcd57df4beccc84d9ad3f7f08cac50588f76370afc16c85752702 - uri: huggingface://Undi95/Llama-3-Unholy-8B-GGUF/Llama-3-Unholy-8B.q8_0.gguf + - filename: Llama-3-Unholy-8B.q8_0.gguf + sha256: 8d4137018acdcd57df4beccc84d9ad3f7f08cac50588f76370afc16c85752702 + uri: huggingface://Undi95/Llama-3-Unholy-8B-GGUF/Llama-3-Unholy-8B.q8_0.gguf - <<: *llama3 name: "therapyllama-8b-v1" urls: - - https://huggingface.co/victunes/TherapyLlama-8B-v1-GGUF + - https://huggingface.co/victunes/TherapyLlama-8B-v1-GGUF icon: https://cdn-uploads.huggingface.co/production/uploads/65f07d05279d2d8f725bf0c3/A-ckcZ9H0Ee1n_ls2FM41.png description: | Trained on Llama 3 8B using a modified version of jerryjalapeno/nart-100k-synthetic. It is a Llama 3 version of https://huggingface.co/victunes/TherapyBeagle-11B-v2 - TherapyLlama is hopefully aligned to be helpful, healthy, and comforting. + TherapyLlama is hopefully aligned to be helpful, healthy, and comforting. Usage: Do not hold back on Buddy. Open up to Buddy. @@ -243,56 +243,56 @@ parameters: model: TherapyLlama-8B-v1-Q4_K_M.gguf files: - - filename: TherapyLlama-8B-v1-Q4_K_M.gguf - sha256: 3d5a16d458e074a7bc7e706a493d8e95e8a7b2cb16934c851aece0af9d1da14a - uri: huggingface://victunes/TherapyLlama-8B-v1-GGUF/TherapyLlama-8B-v1-Q4_K_M.gguf + - filename: TherapyLlama-8B-v1-Q4_K_M.gguf + sha256: 3d5a16d458e074a7bc7e706a493d8e95e8a7b2cb16934c851aece0af9d1da14a + uri: huggingface://victunes/TherapyLlama-8B-v1-GGUF/TherapyLlama-8B-v1-Q4_K_M.gguf - <<: *llama3 name: "aura-uncensored-l3-8b-iq-imatrix" urls: - - https://huggingface.co/Lewdiculous/Aura_Uncensored_l3_8B-GGUF-IQ-Imatrix + - https://huggingface.co/Lewdiculous/Aura_Uncensored_l3_8B-GGUF-IQ-Imatrix icon: https://cdn-uploads.huggingface.co/production/uploads/626dfb8786671a29c715f8a9/oiYHWIEHqmgUkY0GsVdDx.png description: | - This is another better atempt at a less censored Llama-3 with hopefully more stable formatting. + This is another better atempt at a less censored Llama-3 with hopefully more stable formatting. overrides: parameters: model: Aura_Uncensored_l3_8B-Q4_K_M-imat.gguf files: - - filename: Aura_Uncensored_l3_8B-Q4_K_M-imat.gguf - sha256: 265ded6a4f439bec160f394e3083a4a20e32ebb9d1d2d85196aaab23dab87fb2 - uri: huggingface://Lewdiculous/Aura_Uncensored_l3_8B-GGUF-IQ-Imatrix/Aura_Uncensored_l3_8B-Q4_K_M-imat.gguf + - filename: Aura_Uncensored_l3_8B-Q4_K_M-imat.gguf + sha256: 265ded6a4f439bec160f394e3083a4a20e32ebb9d1d2d85196aaab23dab87fb2 + uri: huggingface://Lewdiculous/Aura_Uncensored_l3_8B-GGUF-IQ-Imatrix/Aura_Uncensored_l3_8B-Q4_K_M-imat.gguf - &dolphin name: "dolphin-2.9-llama3-8b" url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master" urls: - - https://huggingface.co/cognitivecomputations/dolphin-2.9-llama3-8b-gguf + - https://huggingface.co/cognitivecomputations/dolphin-2.9-llama3-8b-gguf tags: - - llm - - gguf - - gpu - - cpu - - llama3 + - llm + - gguf + - gpu + - cpu + - llama3 license: llama3 description: | Dolphin-2.9 has a variety of instruction, conversational, and coding skills. It also has initial agentic abilities and supports function calling. - Dolphin is uncensored. + Dolphin is uncensored. Curated and trained by Eric Hartford, Lucas Atkins, and Fernando Fernandes, and Cognitive Computations icon: https://cdn-uploads.huggingface.co/production/uploads/63111b2d88942700629f5771/ldkN1J0WIDQwU4vutGYiD.png overrides: parameters: model: dolphin-2.9-llama3-8b-q4_K_M.gguf files: - - filename: dolphin-2.9-llama3-8b-q4_K_M.gguf - sha256: be988199ce28458e97205b11ae9d9cf4e3d8e18ff4c784e75bfc12f54407f1a1 - uri: huggingface://cognitivecomputations/dolphin-2.9-llama3-8b-gguf/dolphin-2.9-llama3-8b-q4_K_M.gguf + - filename: dolphin-2.9-llama3-8b-q4_K_M.gguf + sha256: be988199ce28458e97205b11ae9d9cf4e3d8e18ff4c784e75bfc12f54407f1a1 + uri: huggingface://cognitivecomputations/dolphin-2.9-llama3-8b-gguf/dolphin-2.9-llama3-8b-q4_K_M.gguf - <<: *dolphin name: "dolphin-2.9-llama3-8b:Q6_K" overrides: parameters: model: dolphin-2.9-llama3-8b-q6_K.gguf files: - - filename: dolphin-2.9-llama3-8b-q6_K.gguf - sha256: 8aac72a0bd72c075ba7be1aa29945e47b07d39cd16be9a80933935f51b57fb32 - uri: huggingface://cognitivecomputations/dolphin-2.9-llama3-8b-gguf/dolphin-2.9-llama3-8b-q6_K.gguf + - filename: dolphin-2.9-llama3-8b-q6_K.gguf + sha256: 8aac72a0bd72c075ba7be1aa29945e47b07d39cd16be9a80933935f51b57fb32 + uri: huggingface://cognitivecomputations/dolphin-2.9-llama3-8b-gguf/dolphin-2.9-llama3-8b-q6_K.gguf ## LLama2 and derivatives ### Start Fimbulvetr - &vicuna-chat @@ -304,20 +304,20 @@ description: | Cute girl to catch your attention. urls: - - https://huggingface.co/Sao10K/Fimbulvetr-11B-v2-GGUF + - https://huggingface.co/Sao10K/Fimbulvetr-11B-v2-GGUF tags: - - llm - - gguf - - gpu - - cpu - - llama3 + - llm + - gguf + - gpu + - cpu + - llama3 overrides: parameters: model: Fimbulvetr-11B-v2-Test-14.q4_K_M.gguf files: - - filename: Fimbulvetr-11B-v2-Test-14.q4_K_M.gguf - sha256: 3597dacfb0ab717d565d8a4d6067f10dcb0e26cc7f21c832af1a10a87882a8fd - uri: huggingface://Sao10K/Fimbulvetr-11B-v2-GGUF/Fimbulvetr-11B-v2-Test-14.q4_K_M.gguf + - filename: Fimbulvetr-11B-v2-Test-14.q4_K_M.gguf + sha256: 3597dacfb0ab717d565d8a4d6067f10dcb0e26cc7f21c832af1a10a87882a8fd + uri: huggingface://Sao10K/Fimbulvetr-11B-v2-GGUF/Fimbulvetr-11B-v2-Test-14.q4_K_M.gguf ### Start noromaid - &noromaid url: "github:mudler/LocalAI/gallery/noromaid.yaml@master" @@ -325,48 +325,48 @@ icon: https://cdn-uploads.huggingface.co/production/uploads/630dfb008df86f1e5becadc3/VKX2Z2yjZX5J8kXzgeCYO.png license: cc-by-nc-4.0 urls: - - https://huggingface.co/NeverSleep/Noromaid-13B-0.4-DPO-GGUF + - https://huggingface.co/NeverSleep/Noromaid-13B-0.4-DPO-GGUF tags: - - llm - - llama2 - - gguf - - gpu - - cpu + - llm + - llama2 + - gguf + - gpu + - cpu overrides: parameters: model: Noromaid-13B-0.4-DPO.q4_k_m.gguf files: - - filename: Noromaid-13B-0.4-DPO.q4_k_m.gguf - sha256: cb28e878d034fae3d0b43326c5fc1cfb4ab583b17c56e41d6ce023caec03c1c1 - uri: huggingface://NeverSleep/Noromaid-13B-0.4-DPO-GGUF/Noromaid-13B-0.4-DPO.q4_k_m.gguf + - filename: Noromaid-13B-0.4-DPO.q4_k_m.gguf + sha256: cb28e878d034fae3d0b43326c5fc1cfb4ab583b17c56e41d6ce023caec03c1c1 + uri: huggingface://NeverSleep/Noromaid-13B-0.4-DPO-GGUF/Noromaid-13B-0.4-DPO.q4_k_m.gguf ### START LLaVa - &llava url: "github:mudler/LocalAI/gallery/llava.yaml@master" license: apache-2.0 description: | - LLaVA represents a novel end-to-end trained large multimodal model that combines a vision encoder and Vicuna for general-purpose visual and language understanding, achieving impressive chat capabilities mimicking spirits of the multimodal GPT-4 and setting a new state-of-the-art accuracy on Science QA. + LLaVA represents a novel end-to-end trained large multimodal model that combines a vision encoder and Vicuna for general-purpose visual and language understanding, achieving impressive chat capabilities mimicking spirits of the multimodal GPT-4 and setting a new state-of-the-art accuracy on Science QA. urls: - - https://llava-vl.github.io/ + - https://llava-vl.github.io/ tags: - - llm - - multimodal - - gguf - - gpu - - llama2 - - cpu + - llm + - multimodal + - gguf + - gpu + - llama2 + - cpu name: "llava-1.6-vicuna" overrides: mmproj: mmproj-vicuna7b-f16.gguf parameters: model: vicuna-7b-q5_k.gguf files: - - filename: vicuna-7b-q5_k.gguf - uri: https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/vicuna-7b-q5_k.gguf - - filename: mmproj-vicuna7b-f16.gguf - uri: https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/mmproj-vicuna7b-f16.gguf + - filename: vicuna-7b-q5_k.gguf + uri: https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/vicuna-7b-q5_k.gguf + - filename: mmproj-vicuna7b-f16.gguf + uri: https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/mmproj-vicuna7b-f16.gguf - <<: *llava name: "llava-1.6-mistral" overrides: @@ -374,12 +374,12 @@ parameters: model: llava-v1.6-mistral-7b.gguf files: - - filename: llava-v1.6-mistral-7b.gguf - sha256: 31826170ffa2e8080bbcd74cac718f906484fd5a59895550ef94c1baa4997595 - uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q6_K.gguf - - filename: llava-v1.6-7b-mmproj-f16.gguf - sha256: 00205ee8a0d7a381900cd031e43105f86aa0d8c07bf329851e85c71a26632d16 - uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf + - filename: llava-v1.6-mistral-7b.gguf + sha256: 31826170ffa2e8080bbcd74cac718f906484fd5a59895550ef94c1baa4997595 + uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q6_K.gguf + - filename: llava-v1.6-7b-mmproj-f16.gguf + sha256: 00205ee8a0d7a381900cd031e43105f86aa0d8c07bf329851e85c71a26632d16 + uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf - <<: *llava name: "llava-1.5" overrides: @@ -387,12 +387,12 @@ parameters: model: llava-v1.5-7b-Q4_K.gguf files: - - filename: llava-v1.5-7b-Q4_K.gguf - sha256: c91ebf0a628ceb25e374df23ad966cc1bf1514b33fecf4f0073f9619dec5b3f9 - uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-Q4_K.gguf - - filename: llava-v1.5-7b-mmproj-Q8_0.gguf - sha256: 09c230de47f6f843e4841656f7895cac52c6e7ec7392acb5e8527de8b775c45a - uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-mmproj-Q8_0.gguf + - filename: llava-v1.5-7b-Q4_K.gguf + sha256: c91ebf0a628ceb25e374df23ad966cc1bf1514b33fecf4f0073f9619dec5b3f9 + uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-Q4_K.gguf + - filename: llava-v1.5-7b-mmproj-Q8_0.gguf + sha256: 09c230de47f6f843e4841656f7895cac52c6e7ec7392acb5e8527de8b775c45a + uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-mmproj-Q8_0.gguf ### START Phi-2 - &phi-2-chat url: "github:mudler/LocalAI/gallery/phi-2-chat.yaml@master" @@ -416,32 +416,32 @@ Finetuned from model: Phi-2 urls: - - https://huggingface.co/l3utterfly/phi-2-layla-v1-chatml - - https://huggingface.co/l3utterfly/phi-2-layla-v1-chatml-gguf + - https://huggingface.co/l3utterfly/phi-2-layla-v1-chatml + - https://huggingface.co/l3utterfly/phi-2-layla-v1-chatml-gguf tags: - - llm - - gguf - - gpu - - llama2 - - cpu + - llm + - gguf + - gpu + - llama2 + - cpu name: "phi-2-chat:Q8_0" overrides: parameters: model: phi-2-layla-v1-chatml-Q8_0.gguf files: - - filename: "phi-2-layla-v1-chatml-Q8_0.gguf" - sha256: "0cf542a127c2c835066a78028009b7eddbaf773cc2a26e1cb157ce5e09c1a2e0" - uri: "huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf" + - filename: "phi-2-layla-v1-chatml-Q8_0.gguf" + sha256: "0cf542a127c2c835066a78028009b7eddbaf773cc2a26e1cb157ce5e09c1a2e0" + uri: "huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf" - <<: *phi-2-chat name: "phi-2-chat" overrides: parameters: model: phi-2-layla-v1-chatml-Q4_K.gguf files: - - filename: "phi-2-layla-v1-chatml-Q4_K.gguf" - sha256: "b071e5624b60b8911f77261398802c4b4079c6c689e38e2ce75173ed62bc8a48" - uri: "huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q4_K.gguf" + - filename: "phi-2-layla-v1-chatml-Q4_K.gguf" + sha256: "b071e5624b60b8911f77261398802c4b4079c6c689e38e2ce75173ed62bc8a48" + uri: "huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q4_K.gguf" - <<: *phi-2-chat license: mit icon: "https://huggingface.co/rhysjones/phi-2-orange/resolve/main/phi-2-orange.jpg" @@ -450,23 +450,23 @@ There is an updated model at rhysjones/phi-2-orange-v2 which has higher evals, if you wish to test. urls: - - https://huggingface.co/rhysjones/phi-2-orange - - https://huggingface.co/TheBloke/phi-2-orange-GGUF + - https://huggingface.co/rhysjones/phi-2-orange + - https://huggingface.co/TheBloke/phi-2-orange-GGUF tags: - - llm - - gguf - - llama2 - - gpu - - cpu + - llm + - gguf + - llama2 + - gpu + - cpu name: "phi-2-orange" overrides: parameters: model: phi-2-orange.Q4_0.gguf files: - - filename: "phi-2-orange.Q4_0.gguf" - sha256: "49cb710ae688e1b19b1b299087fa40765a0cd677e3afcc45e5f7ef6750975dcf" - uri: "huggingface://TheBloke/phi-2-orange-GGUF/phi-2-orange.Q4_0.gguf" + - filename: "phi-2-orange.Q4_0.gguf" + sha256: "49cb710ae688e1b19b1b299087fa40765a0cd677e3afcc45e5f7ef6750975dcf" + uri: "huggingface://TheBloke/phi-2-orange-GGUF/phi-2-orange.Q4_0.gguf" ### START Phi-3 - &phi-3 url: "github:mudler/LocalAI/gallery/phi-3-chat.yaml@master" @@ -477,30 +477,30 @@ The Phi-3-Mini-4K-Instruct is a 3.8B parameters, lightweight, state-of-the-art open model trained with the Phi-3 datasets that includes both synthetic data and the filtered publicly available websites data with a focus on high-quality and reasoning dense properties. The model belongs to the Phi-3 family with the Mini version in two variants 4K and 128K which is the context length (in tokens) it can support. The model has underwent a post-training process that incorporates both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures. When assessed against benchmarks testing common sense, language understanding, math, code, long context and logical reasoning, Phi-3 Mini-4K-Instruct showcased a robust and state-of-the-art performance among models with less than 13 billion parameters. urls: - - https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf + - https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf tags: - - llm - - gguf - - gpu - - llama2 - - cpu + - llm + - gguf + - gpu + - llama2 + - cpu overrides: parameters: model: Phi-3-mini-4k-instruct-q4.gguf files: - - filename: "Phi-3-mini-4k-instruct-q4.gguf" - sha256: "4fed7364ee3e0c7cb4fe0880148bfdfcd1b630981efa0802a6b62ee52e7da97e" - uri: "huggingface://microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-q4.gguf" + - filename: "Phi-3-mini-4k-instruct-q4.gguf" + sha256: "4fed7364ee3e0c7cb4fe0880148bfdfcd1b630981efa0802a6b62ee52e7da97e" + uri: "huggingface://microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-q4.gguf" - <<: *phi-3 name: "phi-3-mini-4k-instruct:fp16" overrides: parameters: model: Phi-3-mini-4k-instruct-fp16.gguf files: - - filename: "Phi-3-mini-4k-instruct-fp16.gguf" - sha256: "ad9f8ff11cd096115adc8ff50befa22fc3da2718672ddd2ab30faccd70488605" - uri: "huggingface://microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-fp16.gguf" + - filename: "Phi-3-mini-4k-instruct-fp16.gguf" + sha256: "ad9f8ff11cd096115adc8ff50befa22fc3da2718672ddd2ab30faccd70488605" + uri: "huggingface://microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-fp16.gguf" ### START Hermes-2-Pro-Mistral - &hermes-2-pro-mistral url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master" @@ -520,39 +520,39 @@ Learn more about the function calling on our github repo here: https://github.com/NousResearch/Hermes-Function-Calling/tree/main urls: - - https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF + - https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF tags: - - llm - - gguf - - gpu - - llama2 - - cpu + - llm + - gguf + - gpu + - llama2 + - cpu overrides: parameters: model: Hermes-2-Pro-Mistral-7B.Q4_0.gguf files: - - filename: "Hermes-2-Pro-Mistral-7B.Q4_0.gguf" - sha256: "f446c3125026f7af6757dd097dda02280adc85e908c058bd6f1c41a118354745" - uri: "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q4_0.gguf" + - filename: "Hermes-2-Pro-Mistral-7B.Q4_0.gguf" + sha256: "f446c3125026f7af6757dd097dda02280adc85e908c058bd6f1c41a118354745" + uri: "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q4_0.gguf" - <<: *hermes-2-pro-mistral name: "hermes-2-pro-mistral:Q6_K" overrides: parameters: model: Hermes-2-Pro-Mistral-7B.Q6_K.gguf files: - - filename: "Hermes-2-Pro-Mistral-7B.Q6_K.gguf" - sha256: "40adc3b227bc36764de148fdda4df5df385adc06650d58d4dbe726ee0214eeff" - uri: "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf" + - filename: "Hermes-2-Pro-Mistral-7B.Q6_K.gguf" + sha256: "40adc3b227bc36764de148fdda4df5df385adc06650d58d4dbe726ee0214eeff" + uri: "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf" - <<: *hermes-2-pro-mistral name: "hermes-2-pro-mistral:Q8_0" overrides: parameters: model: Hermes-2-Pro-Mistral-7B.Q8_0.gguf files: - - filename: "Hermes-2-Pro-Mistral-7B.Q8_0.gguf" - sha256: "b6d95d7ec9a395b7568cc94b0447fd4f90b6f69d6e44794b1fbb84e3f732baca" - uri: "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q8_0.gguf" + - filename: "Hermes-2-Pro-Mistral-7B.Q8_0.gguf" + sha256: "b6d95d7ec9a395b7568cc94b0447fd4f90b6f69d6e44794b1fbb84e3f732baca" + uri: "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q8_0.gguf" ### END Hermes-2-Pro-Mistral ### START Cerbero - url: "github:mudler/LocalAI/gallery/cerbero.yaml@master" @@ -560,20 +560,20 @@ description: | cerbero-7b is specifically crafted to fill the void in Italy's AI landscape. urls: - - https://huggingface.co/galatolo/cerbero-7b + - https://huggingface.co/galatolo/cerbero-7b tags: - - llm - - gguf - - gpu - - cpu - - mistral + - llm + - gguf + - gpu + - cpu + - mistral overrides: parameters: model: galatolo-Q4_K.gguf files: - - filename: "galatolo-Q4_K.gguf" - sha256: "ca0cfd5a9ad40dc16416aa3a277015d0299b62c0803b67f5709580042202c172" - uri: "huggingface://galatolo/cerbero-7b-gguf/ggml-model-Q4_K.gguf" + - filename: "galatolo-Q4_K.gguf" + sha256: "ca0cfd5a9ad40dc16416aa3a277015d0299b62c0803b67f5709580042202c172" + uri: "huggingface://galatolo/cerbero-7b-gguf/ggml-model-Q4_K.gguf" ### START Codellama - &codellama url: "github:mudler/LocalAI/gallery/codellama.yaml@master" @@ -584,34 +584,34 @@ Code Llama is a collection of pretrained and fine-tuned generative text models ranging in scale from 7 billion to 34 billion parameters. This model is designed for general code synthesis and understanding. urls: - - https://huggingface.co/TheBloke/CodeLlama-7B-GGUF - - https://huggingface.co/meta-llama/CodeLlama-7b-hf + - https://huggingface.co/TheBloke/CodeLlama-7B-GGUF + - https://huggingface.co/meta-llama/CodeLlama-7b-hf tags: - - llm - - gguf - - gpu - - llama2 - - cpu + - llm + - gguf + - gpu + - llama2 + - cpu overrides: parameters: model: codellama-7b.Q4_0.gguf files: - - filename: "codellama-7b.Q4_0.gguf" - sha256: "33052f6dd41436db2f83bd48017b6fff8ce0184e15a8a227368b4230f1da97b5" - uri: "huggingface://TheBloke/CodeLlama-7B-GGUF/codellama-7b.Q4_0.gguf" + - filename: "codellama-7b.Q4_0.gguf" + sha256: "33052f6dd41436db2f83bd48017b6fff8ce0184e15a8a227368b4230f1da97b5" + uri: "huggingface://TheBloke/CodeLlama-7B-GGUF/codellama-7b.Q4_0.gguf" ### START Embeddings - &sentencentransformers description: | This framework provides an easy method to compute dense vector representations for sentences, paragraphs, and images. The models are based on transformer networks like BERT / RoBERTa / XLM-RoBERTa etc. and achieve state-of-the-art performance in various tasks. Text is embedded in vector space such that similar text are closer and can efficiently be found using cosine similarity. urls: - - https://github.com/UKPLab/sentence-transformers + - https://github.com/UKPLab/sentence-transformers tags: - - gpu - - cpu - - embeddings - - python + - gpu + - cpu + - embeddings + - python name: "all-MiniLM-L6-v2" url: "github:mudler/LocalAI/gallery/sentencetransformers.yaml@master" overrides: @@ -628,42 +628,42 @@ A text-to-image model that uses Stable Diffusion 1.5 to generate images from text prompts. This model is DreamShaper model by Lykon. urls: - - https://civitai.com/models/4384/dreamshaper + - https://civitai.com/models/4384/dreamshaper tags: - - text-to-image - - stablediffusion - - python - - sd-1.5 - - gpu + - text-to-image + - stablediffusion + - python + - sd-1.5 + - gpu url: "github:mudler/LocalAI/gallery/dreamshaper.yaml@master" overrides: parameters: model: DreamShaper_8_pruned.safetensors files: - - filename: DreamShaper_8_pruned.safetensors - uri: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors - sha256: 879db523c30d3b9017143d56705015e15a2cb5628762c11d086fed9538abd7fd + - filename: DreamShaper_8_pruned.safetensors + uri: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors + sha256: 879db523c30d3b9017143d56705015e15a2cb5628762c11d086fed9538abd7fd ## Whisper - url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master" name: "whisper-1" license: "MIT" urls: - - https://github.com/ggerganov/whisper.cpp - - https://huggingface.co/ggerganov/whisper.cpp + - https://github.com/ggerganov/whisper.cpp + - https://huggingface.co/ggerganov/whisper.cpp description: | Port of OpenAI's Whisper model in C/C++ - + ## Bert embeddings - url: "github:mudler/LocalAI/gallery/bert-embeddings.yaml@master" name: "bert-embeddings" license: "Apache 2.0" urls: - - https://huggingface.co/skeskinen/ggml + - https://huggingface.co/skeskinen/ggml tags: - - embeddings + - embeddings description: | Bert model that can be used for embeddings @@ -671,13 +671,13 @@ - url: github:mudler/LocalAI/gallery/stablediffusion.yaml@master license: "BSD-3" urls: - - https://github.com/EdVince/Stable-Diffusion-NCNN - - https://github.com/EdVince/Stable-Diffusion-NCNN/blob/main/LICENSE + - https://github.com/EdVince/Stable-Diffusion-NCNN + - https://github.com/EdVince/Stable-Diffusion-NCNN/blob/main/LICENSE description: | - Stable Diffusion in NCNN with c++, supported txt2img and img2img + Stable Diffusion in NCNN with c++, supported txt2img and img2img name: stablediffusion-cpp - + ## Tiny Dream - url: github:mudler/LocalAI/gallery/tinydream.yaml@master name: tinydream @@ -702,9 +702,9 @@ A fast, local neural text to speech system that sounds great and is optimized for the Raspberry Pi 4. Piper is used in a variety of [projects](https://github.com/rhasspy/piper#people-using-piper). tags: - - tts - - text-to-speech - - cpu + - tts + - text-to-speech + - cpu override: parameters: @@ -786,7 +786,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-de-thorsten-low - + override: parameters: model: de-thorsten-low.onnx @@ -796,7 +796,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-el-gr-rapunzelina-low - + override: parameters: model: el-gr-rapunzelina-low.onnx @@ -806,7 +806,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-gb-alan-low - + override: parameters: model: en-gb-alan-low.onnx @@ -816,7 +816,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-gb-southern_english_female-low - + override: parameters: model: en-gb-southern_english @@ -826,7 +826,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-amy-low - + override: parameters: model: en-us-amy-low.onnx @@ -836,7 +836,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-danny-low - + override: parameters: model: en-us-danny-low.onnx @@ -846,7 +846,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-kathleen-low - + override: parameters: model: en-us-kathleen-low.onnx @@ -856,7 +856,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-lessac-low - + override: parameters: model: en-us-lessac-low.onnx @@ -866,7 +866,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-lessac-medium - + override: parameters: model: en-us-lessac-medium.onnx @@ -876,7 +876,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-libritts-high - + override: parameters: model: en-us-libritts-high.onnx @@ -886,7 +886,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-ryan-high - + override: parameters: model: en-us-ryan-high.onnx @@ -896,7 +896,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-ryan-low - + override: parameters: model: en-us-ryan-low.onnx @@ -907,7 +907,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-ryan-medium - + override: parameters: model: en-us-ryan-medium.onnx @@ -938,7 +938,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-es-mls_10246-low - + override: parameters: model: es-mls_10246-low.onnx @@ -949,7 +949,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-es-mls_9972-low - + override: parameters: model: es-mls_9972-low.onnx @@ -960,7 +960,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-fi-harri-low - + override: parameters: model: fi-harri-low.onnx @@ -971,7 +971,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-fr-gilles-low - + override: parameters: model: fr-gilles-low.onnx @@ -982,7 +982,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-fr-mls_1840-low - + override: parameters: model: fr-mls_1840-low.onnx @@ -993,7 +993,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-fr-siwis-low - + override: parameters: model: fr-siwis-low.onnx @@ -1004,7 +1004,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-fr-siwis-medium - + override: parameters: model: fr-siwis-medium.onnx @@ -1015,7 +1015,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-is-bui-medium - + override: parameters: model: is-bui-medium.onnx @@ -1026,7 +1026,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-is-salka-medium - + override: parameters: model: is-salka-medium.onnx @@ -1037,7 +1037,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-is-steinn-medium - + override: parameters: model: is-steinn-medium.onnx @@ -1048,7 +1048,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-is-ugla-medium - + override: parameters: model: is-ugla-medium.onnx @@ -1059,7 +1059,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-it-riccardo_fasol-x-low - + override: parameters: model: it-riccardo_fasol-x-low.onnx @@ -1070,7 +1070,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-kk-iseke-x-low - + override: parameters: model: kk-iseke-x-low.onnx @@ -1081,7 +1081,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-kk-issai-high - + override: parameters: model: kk-issai-high.onnx @@ -1092,7 +1092,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-kk-raya-x-low - + override: parameters: model: kk-raya-x-low.onnx @@ -1103,7 +1103,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-ne-google-medium - + override: parameters: model: ne-google-medium.onnx @@ -1114,7 +1114,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-ne-google-x-low - + override: parameters: model: ne-google-x-low.onnx @@ -1125,7 +1125,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-nl-mls_5809-low - + override: parameters: model: nl-mls_5809-low.onnx @@ -1136,7 +1136,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-nl-mls_7432-low - + override: parameters: model: nl-mls_7432-low.onnx @@ -1147,7 +1147,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-nl-nathalie-x-low - + override: parameters: model: nl-nathalie-x-low.onnx @@ -1158,7 +1158,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-nl-rdh-medium - + override: parameters: model: nl-rdh-medium.onnx @@ -1169,7 +1169,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-nl-rdh-x-low - + override: parameters: model: nl-rdh-x-low.onnx @@ -1180,7 +1180,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-no-talesyntese-medium - + override: parameters: model: no-talesyntese-medium.onnx @@ -1191,7 +1191,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-pl-mls_6892-low - + override: parameters: model: pl-mls_6892-low.onnx @@ -1202,7 +1202,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-pt-br-edresson-low - + override: parameters: model: pt-br-edresson-low.onnx @@ -1213,7 +1213,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-ru-irinia-medium - + override: parameters: model: ru-irinia-medium.onnx @@ -1224,7 +1224,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-sv-se-nst-medium - + override: parameters: model: sv-se-nst-medium.onnx @@ -1235,7 +1235,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-uk-lada-x-low - + override: parameters: model: uk-lada-x-low.onnx @@ -1246,7 +1246,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-vi-25hours-single-low - + override: parameters: model: vi-25hours-single-low.onnx @@ -1257,7 +1257,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-vi-vivos-x-low - + override: parameters: model: vi-vivos-x-low.onnx @@ -1268,7 +1268,7 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-zh-cn-huayan-x-low - + override: parameters: model: zh-cn-huayan-x-low.onnx @@ -1279,10 +1279,10 @@ - <<: *piper url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-zh_CN-huayan-medium - + override: parameters: model: zh_CN-huayan-medium.onnx files: - filename: voice-zh_CN-huayan-medium.tar.gz - uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-zh_CN-huayan-medium.tar.gz \ No newline at end of file + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-zh_CN-huayan-medium.tar.gz diff --git a/gallery/llama3-instruct.yaml b/gallery/llama3-instruct.yaml index 96272c58..f6016cbd 100644 --- a/gallery/llama3-instruct.yaml +++ b/gallery/llama3-instruct.yaml @@ -1,6 +1,6 @@ +--- name: "llama3-instruct" - config_file: | mmap: true template: diff --git a/gallery/llava.yaml b/gallery/llava.yaml index 44c1aa97..4d07847a 100644 --- a/gallery/llava.yaml +++ b/gallery/llava.yaml @@ -1,6 +1,6 @@ +--- name: "llava" - config_file: | backend: llama-cpp context_size: 4096 diff --git a/gallery/noromaid.yaml b/gallery/noromaid.yaml index 0b9badfe..4772e4ec 100644 --- a/gallery/noromaid.yaml +++ b/gallery/noromaid.yaml @@ -1,3 +1,4 @@ +--- config_file: | mmap: true backend: llama-cpp @@ -50,4 +51,3 @@ config_file: | - - "\n" - "\n\n\n" - diff --git a/gallery/parler-tts.yaml b/gallery/parler-tts.yaml index 76252b1d..98d4614b 100644 --- a/gallery/parler-tts.yaml +++ b/gallery/parler-tts.yaml @@ -1,2 +1,3 @@ +--- config_file: | backend: parler-tts diff --git a/gallery/phi-2-chat.yaml b/gallery/phi-2-chat.yaml index 3fc84d3b..5e1fb702 100644 --- a/gallery/phi-2-chat.yaml +++ b/gallery/phi-2-chat.yaml @@ -1,6 +1,6 @@ +--- name: "phi-2-chatml" - config_file: | mmap: true template: @@ -16,4 +16,3 @@ config_file: | f16: true stopwords: - <|im_end|> - diff --git a/gallery/phi-2-orange.yaml b/gallery/phi-2-orange.yaml index 645875ad..89971b4d 100644 --- a/gallery/phi-2-orange.yaml +++ b/gallery/phi-2-orange.yaml @@ -1,3 +1,4 @@ +--- name: "phi-2-orange" config_file: | diff --git a/gallery/phi-3-chat.yaml b/gallery/phi-3-chat.yaml index b17e5bb4..ede4fd0f 100644 --- a/gallery/phi-3-chat.yaml +++ b/gallery/phi-3-chat.yaml @@ -1,3 +1,4 @@ +--- name: "phi-3-chat" config_file: | @@ -15,4 +16,3 @@ config_file: | f16: true stopwords: - <|end|> - diff --git a/gallery/piper.yaml b/gallery/piper.yaml index eb1a6ecc..c7f40f8c 100644 --- a/gallery/piper.yaml +++ b/gallery/piper.yaml @@ -1,2 +1,3 @@ +--- config_file: | backend: piper diff --git a/gallery/rerankers.yaml b/gallery/rerankers.yaml index dbbad5a0..a4ac48ca 100644 --- a/gallery/rerankers.yaml +++ b/gallery/rerankers.yaml @@ -1,2 +1,3 @@ +--- config_file: | - backend: rerankers \ No newline at end of file + backend: rerankers diff --git a/gallery/sentencetransformers.yaml b/gallery/sentencetransformers.yaml index 9ba5d29b..e8ba7aa0 100644 --- a/gallery/sentencetransformers.yaml +++ b/gallery/sentencetransformers.yaml @@ -1,4 +1,5 @@ +--- name: "sentencetransformers" config_file: | - backend: sentencetransformers \ No newline at end of file + backend: sentencetransformers diff --git a/gallery/stablediffusion.yaml b/gallery/stablediffusion.yaml index 9b1cad32..f84de7f2 100644 --- a/gallery/stablediffusion.yaml +++ b/gallery/stablediffusion.yaml @@ -1,3 +1,4 @@ +--- name: "stablediffusion-cpp" config_file: | @@ -7,42 +8,42 @@ config_file: | model: stablediffusion_assets files: -- filename: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param" - sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82" - uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param" -- filename: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param" - sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20" - uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param" -- filename: "stablediffusion_assets/AutoencoderKL-base-fp16.param" - sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba" - uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param" -- filename: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin" - sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa" - uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-encoder-512-512-fp16.bin" -- filename: "stablediffusion_assets/AutoencoderKL-fp16.bin" - sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd" - uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-fp16.bin" -- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin" - sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6" - uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/FrozenCLIPEmbedder-fp16.bin" -- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param" - sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9" - uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param" -- filename: "stablediffusion_assets/log_sigmas.bin" - sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac" - uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin" -- filename: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param" - sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1" - uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param" -- filename: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param" - sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa" - uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param" -- filename: "stablediffusion_assets/UNetModel-base-MHA-fp16.param" - sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d" - uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param" -- filename: "stablediffusion_assets/UNetModel-MHA-fp16.bin" - sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3" - uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/UNetModel-MHA-fp16.bin" -- filename: "stablediffusion_assets/vocab.txt" - sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d" - uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt" \ No newline at end of file + - filename: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param" + sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param" + - filename: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param" + sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param" + - filename: "stablediffusion_assets/AutoencoderKL-base-fp16.param" + sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param" + - filename: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin" + sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa" + uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-encoder-512-512-fp16.bin" + - filename: "stablediffusion_assets/AutoencoderKL-fp16.bin" + sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd" + uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-fp16.bin" + - filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin" + sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6" + uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/FrozenCLIPEmbedder-fp16.bin" + - filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param" + sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param" + - filename: "stablediffusion_assets/log_sigmas.bin" + sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac" + uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin" + - filename: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param" + sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param" + - filename: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param" + sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param" + - filename: "stablediffusion_assets/UNetModel-base-MHA-fp16.param" + sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param" + - filename: "stablediffusion_assets/UNetModel-MHA-fp16.bin" + sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3" + uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/UNetModel-MHA-fp16.bin" + - filename: "stablediffusion_assets/vocab.txt" + sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d" + uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt" diff --git a/gallery/tinydream.yaml b/gallery/tinydream.yaml index 6e39414c..e4a79ad7 100644 --- a/gallery/tinydream.yaml +++ b/gallery/tinydream.yaml @@ -1,3 +1,4 @@ +--- name: "tinydream" config_file: | @@ -33,4 +34,4 @@ files: uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/UNetModel-fp16.param" - filename: "tinydream_assets/vocab.txt" sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d" - uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/vocab.txt" \ No newline at end of file + uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/vocab.txt" diff --git a/gallery/vicuna-chat.yaml b/gallery/vicuna-chat.yaml index 9669cce0..05600e66 100644 --- a/gallery/vicuna-chat.yaml +++ b/gallery/vicuna-chat.yaml @@ -1,3 +1,4 @@ +--- name: "vicuna-chat" description: | @@ -18,4 +19,4 @@ config_file: | Complete the following sentence: {{.Input}} chat: | {{.Input}} - ASSISTANT: \ No newline at end of file + ASSISTANT: diff --git a/gallery/virtual.yaml b/gallery/virtual.yaml index 054c3257..22e3e546 100644 --- a/gallery/virtual.yaml +++ b/gallery/virtual.yaml @@ -1,6 +1,7 @@ +--- name: "virtual" description: | A Base model definition -license: "N/A" \ No newline at end of file +license: "N/A" diff --git a/gallery/whisper-base.yaml b/gallery/whisper-base.yaml index f654a37c..2dc24d6e 100644 --- a/gallery/whisper-base.yaml +++ b/gallery/whisper-base.yaml @@ -1,12 +1,12 @@ +--- name: "whisper-base" - config_file: | backend: whisper parameters: model: ggml-whisper-base.bin files: -- filename: "ggml-whisper-base.bin" - sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe" - uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin" \ No newline at end of file + - filename: "ggml-whisper-base.bin" + sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe" + uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin" From d98063e80e3bb4685ae681ea443992ba65f8acbc Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Thu, 25 Apr 2024 15:06:22 -0500 Subject: [PATCH 0129/2648] fix: api key polling was not using correct filepath (#2132) Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- core/startup/config_file_watcher.go | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/core/startup/config_file_watcher.go b/core/startup/config_file_watcher.go index 5f6834d4..800059d0 100644 --- a/core/startup/config_file_watcher.go +++ b/core/startup/config_file_watcher.go @@ -5,6 +5,7 @@ import ( "fmt" "os" "path" + "path/filepath" "time" "github.com/fsnotify/fsnotify" @@ -50,6 +51,7 @@ func (c *configFileHandler) Register(filename string, handler fileHandler, runNo } func (c *configFileHandler) callHandler(filename string, handler fileHandler) { + log.Trace().Str("filename", filename).Msg("reading file for dynamic config update") fileContent, err := os.ReadFile(filename) if err != nil && !os.IsNotExist(err) { log.Error().Err(err).Str("filename", filename).Msg("could not read file") @@ -75,7 +77,7 @@ func (c *configFileHandler) Watch() error { <-ticker.C for file, handler := range c.handlers { log.Debug().Str("file", file).Msg("polling config file") - c.callHandler(file, handler) + c.callHandler(filepath.Join(c.appConfig.DynamicConfigsDir, file), handler) } } }() @@ -122,7 +124,8 @@ func (c *configFileHandler) Stop() { func readApiKeysJson(startupAppConfig config.ApplicationConfig) fileHandler { handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error { - log.Debug().Msg("processing api_keys.json") + log.Debug().Msg("processing api keys runtime update") + log.Trace().Int("numKeys", len(startupAppConfig.ApiKeys)).Msg("api keys provided at startup") if len(fileContent) > 0 { // Parse JSON content from the file @@ -132,11 +135,14 @@ func readApiKeysJson(startupAppConfig config.ApplicationConfig) fileHandler { return err } + log.Trace().Int("numKeys", len(fileKeys)).Msg("discovered API keys from api keys dynamic config dile") + appConfig.ApiKeys = append(startupAppConfig.ApiKeys, fileKeys...) } else { + log.Trace().Msg("no API keys discovered from dynamic config file") appConfig.ApiKeys = startupAppConfig.ApiKeys } - log.Debug().Msg("api keys loaded from api_keys.json") + log.Trace().Int("numKeys", len(appConfig.ApiKeys)).Msg("total api keys after processing") return nil } From 5fceb876c4a786e76ab1dfc42e1fe6c7a95c1eee Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 25 Apr 2024 23:40:41 +0200 Subject: [PATCH 0130/2648] :arrow_up: Update ggerganov/llama.cpp (#2133) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b017982e..eae8410a 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=784e11dea1f5ce9638851b2b0dddb107e2a609c8 +CPPLLAMA_VERSION?=46e12c4692a37bdd31a0432fc5153d7d22bc7f72 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 365ef92530d1d24479535e5a232986ad6a764cf5 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 25 Apr 2024 23:41:38 +0200 Subject: [PATCH 0131/2648] :arrow_up: Update mudler/go-stable-diffusion (#2134) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index eae8410a..7d64ad03 100644 --- a/Makefile +++ b/Makefile @@ -25,7 +25,7 @@ BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d PIPER_VERSION?=9d0100873a7dbb0824dfea40e8cec70a1b110759 # stablediffusion version -STABLEDIFFUSION_VERSION?=362df9da29f882dbf09ade61972d16a1f53c3485 +STABLEDIFFUSION_VERSION?=433ea6d9b64d9d08067324a757ef07040ea29568 # tinydream version TINYDREAM_VERSION?=22a12a4bc0ac5455856f28f3b771331a551a4293 From c8dd8e5ef492900453387a7cbc3da3b508c45715 Mon Sep 17 00:00:00 2001 From: Dave Date: Thu, 25 Apr 2024 18:47:06 -0400 Subject: [PATCH 0132/2648] fix: reduce chmod permissions for created files and directories (#2137) quiet more security scanner issues: pass one of chmod restriction to remove group and other permissions Signed-off-by: Dave Lee --- core/backend/options.go | 2 +- core/backend/tts.go | 2 +- core/http/app.go | 10 +++++----- core/http/app_test.go | 6 +++--- core/http/endpoints/openai/assistant_test.go | 13 +++++++------ core/http/endpoints/openai/files_test.go | 2 +- core/startup/startup.go | 8 ++++---- pkg/assets/extract.go | 6 +++--- pkg/downloader/uri.go | 2 +- pkg/gallery/models.go | 8 ++++---- pkg/gallery/models_test.go | 2 +- pkg/model/process.go | 2 +- pkg/templates/cache_test.go | 4 ++-- pkg/utils/config.go | 2 +- tests/integration/stores_test.go | 2 +- 15 files changed, 36 insertions(+), 35 deletions(-) diff --git a/core/backend/options.go b/core/backend/options.go index 60cb01ff..bbb9990d 100644 --- a/core/backend/options.go +++ b/core/backend/options.go @@ -109,7 +109,7 @@ func gRPCPredictOpts(c config.BackendConfig, modelPath string) *pb.PredictOption promptCachePath := "" if c.PromptCachePath != "" { p := filepath.Join(modelPath, c.PromptCachePath) - os.MkdirAll(filepath.Dir(p), 0755) + os.MkdirAll(filepath.Dir(p), 0750) promptCachePath = p } diff --git a/core/backend/tts.go b/core/backend/tts.go index f97b6202..4532cf00 100644 --- a/core/backend/tts.go +++ b/core/backend/tts.go @@ -53,7 +53,7 @@ func ModelTTS(backend, text, modelFile, voice string, loader *model.ModelLoader, return "", nil, fmt.Errorf("could not load piper model") } - if err := os.MkdirAll(appConfig.AudioDir, 0755); err != nil { + if err := os.MkdirAll(appConfig.AudioDir, 0750); err != nil { return "", nil, fmt.Errorf("failed creating audio directory: %s", err) } diff --git a/core/http/app.go b/core/http/app.go index 93eb0e20..bd740410 100644 --- a/core/http/app.go +++ b/core/http/app.go @@ -175,11 +175,11 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi } // Make sure directories exists - os.MkdirAll(appConfig.ImageDir, 0755) - os.MkdirAll(appConfig.AudioDir, 0755) - os.MkdirAll(appConfig.UploadDir, 0755) - os.MkdirAll(appConfig.ConfigsDir, 0755) - os.MkdirAll(appConfig.ModelPath, 0755) + os.MkdirAll(appConfig.ImageDir, 0750) + os.MkdirAll(appConfig.AudioDir, 0750) + os.MkdirAll(appConfig.UploadDir, 0750) + os.MkdirAll(appConfig.ConfigsDir, 0750) + os.MkdirAll(appConfig.ModelPath, 0750) // Load config jsons utils.LoadConfig(appConfig.UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles) diff --git a/core/http/app_test.go b/core/http/app_test.go index 3699c0ed..f4728770 100644 --- a/core/http/app_test.go +++ b/core/http/app_test.go @@ -222,7 +222,7 @@ var _ = Describe("API test", func() { modelDir = filepath.Join(tmpdir, "models") backendAssetsDir := filepath.Join(tmpdir, "backend-assets") - err = os.Mkdir(backendAssetsDir, 0755) + err = os.Mkdir(backendAssetsDir, 0750) Expect(err).ToNot(HaveOccurred()) c, cancel = context.WithCancel(context.Background()) @@ -241,7 +241,7 @@ var _ = Describe("API test", func() { } out, err := yaml.Marshal(g) Expect(err).ToNot(HaveOccurred()) - err = os.WriteFile(filepath.Join(tmpdir, "gallery_simple.yaml"), out, 0644) + err = os.WriteFile(filepath.Join(tmpdir, "gallery_simple.yaml"), out, 0600) Expect(err).ToNot(HaveOccurred()) galleries := []gallery.Gallery{ @@ -595,7 +595,7 @@ var _ = Describe("API test", func() { Expect(err).ToNot(HaveOccurred()) modelDir = filepath.Join(tmpdir, "models") backendAssetsDir := filepath.Join(tmpdir, "backend-assets") - err = os.Mkdir(backendAssetsDir, 0755) + err = os.Mkdir(backendAssetsDir, 0750) Expect(err).ToNot(HaveOccurred()) c, cancel = context.WithCancel(context.Background()) diff --git a/core/http/endpoints/openai/assistant_test.go b/core/http/endpoints/openai/assistant_test.go index bdc41dda..e7c09033 100644 --- a/core/http/endpoints/openai/assistant_test.go +++ b/core/http/endpoints/openai/assistant_test.go @@ -3,10 +3,6 @@ package openai import ( "encoding/json" "fmt" - "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/pkg/model" - "github.com/gofiber/fiber/v2" - "github.com/stretchr/testify/assert" "io" "io/ioutil" "net/http" @@ -16,6 +12,11 @@ import ( "strings" "testing" "time" + + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/pkg/model" + "github.com/gofiber/fiber/v2" + "github.com/stretchr/testify/assert" ) var configsDir string = "/tmp/localai/configs" @@ -49,8 +50,8 @@ func TestAssistantEndpoints(t *testing.T) { } _ = os.RemoveAll(appConfig.ConfigsDir) - _ = os.MkdirAll(appConfig.ConfigsDir, 0755) - _ = os.MkdirAll(modelPath, 0755) + _ = os.MkdirAll(appConfig.ConfigsDir, 0750) + _ = os.MkdirAll(modelPath, 0750) os.Create(filepath.Join(modelPath, "ggml-gpt4all-j")) app := fiber.New(fiber.Config{ diff --git a/core/http/endpoints/openai/files_test.go b/core/http/endpoints/openai/files_test.go index fc77ae45..2d0be7b9 100644 --- a/core/http/endpoints/openai/files_test.go +++ b/core/http/endpoints/openai/files_test.go @@ -251,7 +251,7 @@ func newMultipartFile(filePath, tag, purpose string) (*strings.Reader, *multipar // Helper to create test files func createTestFile(t *testing.T, name string, sizeMB int, option *config.ApplicationConfig) *os.File { - err := os.MkdirAll(option.UploadDir, 0755) + err := os.MkdirAll(option.UploadDir, 0750) if err != nil { t.Fatalf("Error MKDIR: %v", err) diff --git a/core/startup/startup.go b/core/startup/startup.go index 97882a22..b9e95ebf 100644 --- a/core/startup/startup.go +++ b/core/startup/startup.go @@ -23,24 +23,24 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode if options.ModelPath == "" { return nil, nil, nil, fmt.Errorf("options.ModelPath cannot be empty") } - err := os.MkdirAll(options.ModelPath, 0755) + err := os.MkdirAll(options.ModelPath, 0750) if err != nil { return nil, nil, nil, fmt.Errorf("unable to create ModelPath: %q", err) } if options.ImageDir != "" { - err := os.MkdirAll(options.ImageDir, 0755) + err := os.MkdirAll(options.ImageDir, 0750) if err != nil { return nil, nil, nil, fmt.Errorf("unable to create ImageDir: %q", err) } } if options.AudioDir != "" { - err := os.MkdirAll(options.AudioDir, 0755) + err := os.MkdirAll(options.AudioDir, 0750) if err != nil { return nil, nil, nil, fmt.Errorf("unable to create AudioDir: %q", err) } } if options.UploadDir != "" { - err := os.MkdirAll(options.UploadDir, 0755) + err := os.MkdirAll(options.UploadDir, 0750) if err != nil { return nil, nil, nil, fmt.Errorf("unable to create UploadDir: %q", err) } diff --git a/pkg/assets/extract.go b/pkg/assets/extract.go index fc208377..b795cb30 100644 --- a/pkg/assets/extract.go +++ b/pkg/assets/extract.go @@ -10,7 +10,7 @@ import ( func ExtractFiles(content embed.FS, extractDir string) error { // Create the target directory if it doesn't exist - err := os.MkdirAll(extractDir, 0755) + err := os.MkdirAll(extractDir, 0750) if err != nil { return fmt.Errorf("failed to create directory: %v", err) } @@ -25,7 +25,7 @@ func ExtractFiles(content embed.FS, extractDir string) error { targetFile := filepath.Join(extractDir, path) if d.IsDir() { // Create the directory in the target directory - err := os.MkdirAll(targetFile, 0755) + err := os.MkdirAll(targetFile, 0750) if err != nil { return fmt.Errorf("failed to create directory: %v", err) } @@ -39,7 +39,7 @@ func ExtractFiles(content embed.FS, extractDir string) error { } // Create the file in the target directory - err = os.WriteFile(targetFile, fileData, 0644) + err = os.WriteFile(targetFile, fileData, 0600) if err != nil { return fmt.Errorf("failed to write file: %v", err) } diff --git a/pkg/downloader/uri.go b/pkg/downloader/uri.go index 46ccd6a1..797a264b 100644 --- a/pkg/downloader/uri.go +++ b/pkg/downloader/uri.go @@ -184,7 +184,7 @@ func DownloadFile(url string, filePath, sha string, fileN, total int, downloadSt } // Create parent directory - err = os.MkdirAll(filepath.Dir(filePath), 0755) + err = os.MkdirAll(filepath.Dir(filePath), 0750) if err != nil { return fmt.Errorf("failed to create parent directory for file %q: %v", filePath, err) } diff --git a/pkg/gallery/models.go b/pkg/gallery/models.go index 59971bbc..2ab4c832 100644 --- a/pkg/gallery/models.go +++ b/pkg/gallery/models.go @@ -92,7 +92,7 @@ func ReadConfigFile(filePath string) (*Config, error) { func InstallModel(basePath, nameOverride string, config *Config, configOverrides map[string]interface{}, downloadStatus func(string, string, string, float64)) error { // Create base path if it doesn't exist - err := os.MkdirAll(basePath, 0755) + err := os.MkdirAll(basePath, 0750) if err != nil { return fmt.Errorf("failed to create base path: %v", err) } @@ -125,12 +125,12 @@ func InstallModel(basePath, nameOverride string, config *Config, configOverrides filePath := filepath.Join(basePath, template.Name+".tmpl") // Create parent directory - err := os.MkdirAll(filepath.Dir(filePath), 0755) + err := os.MkdirAll(filepath.Dir(filePath), 0750) if err != nil { return fmt.Errorf("failed to create parent directory for prompt template %q: %v", template.Name, err) } // Create and write file content - err = os.WriteFile(filePath, []byte(template.Content), 0644) + err = os.WriteFile(filePath, []byte(template.Content), 0600) if err != nil { return fmt.Errorf("failed to write prompt template %q: %v", template.Name, err) } @@ -170,7 +170,7 @@ func InstallModel(basePath, nameOverride string, config *Config, configOverrides return fmt.Errorf("failed to marshal updated config YAML: %v", err) } - err = os.WriteFile(configFilePath, updatedConfigYAML, 0644) + err = os.WriteFile(configFilePath, updatedConfigYAML, 0600) if err != nil { return fmt.Errorf("failed to write updated config file: %v", err) } diff --git a/pkg/gallery/models_test.go b/pkg/gallery/models_test.go index 21d3a03d..6eb63128 100644 --- a/pkg/gallery/models_test.go +++ b/pkg/gallery/models_test.go @@ -48,7 +48,7 @@ var _ = Describe("Model test", func() { }} out, err := yaml.Marshal(gallery) Expect(err).ToNot(HaveOccurred()) - err = os.WriteFile(filepath.Join(tempdir, "gallery_simple.yaml"), out, 0644) + err = os.WriteFile(filepath.Join(tempdir, "gallery_simple.yaml"), out, 0600) Expect(err).ToNot(HaveOccurred()) galleries := []Gallery{ diff --git a/pkg/model/process.go b/pkg/model/process.go index 5f63ee7f..08822fd9 100644 --- a/pkg/model/process.go +++ b/pkg/model/process.go @@ -65,7 +65,7 @@ func (ml *ModelLoader) GetGRPCPID(id string) (int, error) { func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string) error { // Make sure the process is executable - if err := os.Chmod(grpcProcess, 0755); err != nil { + if err := os.Chmod(grpcProcess, 0700); err != nil { return err } diff --git a/pkg/templates/cache_test.go b/pkg/templates/cache_test.go index 83af02b2..fc5941ed 100644 --- a/pkg/templates/cache_test.go +++ b/pkg/templates/cache_test.go @@ -21,9 +21,9 @@ var _ = Describe("TemplateCache", func() { Expect(err).NotTo(HaveOccurred()) // Writing example template files - err = os.WriteFile(filepath.Join(tempDir, "example.tmpl"), []byte("Hello, {{.Name}}!"), 0644) + err = os.WriteFile(filepath.Join(tempDir, "example.tmpl"), []byte("Hello, {{.Name}}!"), 0600) Expect(err).NotTo(HaveOccurred()) - err = os.WriteFile(filepath.Join(tempDir, "empty.tmpl"), []byte(""), 0644) + err = os.WriteFile(filepath.Join(tempDir, "empty.tmpl"), []byte(""), 0600) Expect(err).NotTo(HaveOccurred()) templateCache = templates.NewTemplateCache(tempDir) diff --git a/pkg/utils/config.go b/pkg/utils/config.go index 929e1f9f..8fd0ec0e 100644 --- a/pkg/utils/config.go +++ b/pkg/utils/config.go @@ -15,7 +15,7 @@ func SaveConfig(filePath, fileName string, obj any) { } absolutePath := filepath.Join(filePath, fileName) - err = os.WriteFile(absolutePath, file, 0644) + err = os.WriteFile(absolutePath, file, 0600) if err != nil { log.Error().Err(err).Str("filepath", absolutePath).Msg("failed to save configuration file") } diff --git a/tests/integration/stores_test.go b/tests/integration/stores_test.go index a4ad4f90..54d0844c 100644 --- a/tests/integration/stores_test.go +++ b/tests/integration/stores_test.go @@ -36,7 +36,7 @@ var _ = Describe("Integration tests for the stores backend(s) and internal APIs" tmpdir, err = os.MkdirTemp("", "") Expect(err).ToNot(HaveOccurred()) backendAssetsDir := filepath.Join(tmpdir, "backend-assets") - err = os.Mkdir(backendAssetsDir, 0755) + err = os.Mkdir(backendAssetsDir, 0750) Expect(err).ToNot(HaveOccurred()) err = assets.ExtractFiles(backendAssets, backendAssetsDir) From eed285f9de7a0cd7d8d553b3a5792b99fb0893b5 Mon Sep 17 00:00:00 2001 From: Dave Date: Thu, 25 Apr 2024 18:47:35 -0400 Subject: [PATCH 0133/2648] fix: update langchainjs (#2136) quick update of the langchainjs example to quiet down some dependency security scanner noise Signed-off-by: Dave Lee --- .../package-lock.json | 1610 ++++++++++++++--- .../langchainjs-localai-example/package.json | 10 +- .../langchainjs-localai-example/src/index.mts | 39 +- .../langchainjs-localai-example/tsconfig.json | 3 +- 4 files changed, 1407 insertions(+), 255 deletions(-) diff --git a/examples/langchain/langchainjs-localai-example/package-lock.json b/examples/langchain/langchainjs-localai-example/package-lock.json index e0a45539..48fee285 100644 --- a/examples/langchain/langchainjs-localai-example/package-lock.json +++ b/examples/langchain/langchainjs-localai-example/package-lock.json @@ -9,8 +9,9 @@ "version": "0.1.0", "license": "MIT", "dependencies": { - "langchain": "^0.0.67", - "typeorm": "^0.3.15" + "@langchain/community": "^0.0.52", + "@langchain/openai": "^0.0.28", + "langchain": "^0.1.36" }, "devDependencies": { "@types/node": "^18.16.4", @@ -18,47 +19,519 @@ } }, "node_modules/@anthropic-ai/sdk": { - "version": "0.4.3", - "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.4.3.tgz", - "integrity": "sha512-SZrlXvjUUYT9rPmSzlTtmVk1OjVNpkCzILRluhiYwNcxXfQyvPJDi0CI6PyymygcgtqEF5EVqhKmC/PtPsNEIw==", + "version": "0.9.1", + "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.9.1.tgz", + "integrity": "sha512-wa1meQ2WSfoY8Uor3EdrJq0jTiZJoKoSii2ZVWRY1oN4Tlr5s59pADg9T79FTbPe1/se5c3pBeZgJL63wmuoBA==", "dependencies": { - "@fortaine/fetch-event-source": "^3.0.6", - "cross-fetch": "^3.1.5" + "@types/node": "^18.11.18", + "@types/node-fetch": "^2.6.4", + "abort-controller": "^3.0.0", + "agentkeepalive": "^4.2.1", + "digest-fetch": "^1.3.0", + "form-data-encoder": "1.7.2", + "formdata-node": "^4.3.2", + "node-fetch": "^2.6.7", + "web-streams-polyfill": "^3.2.1" } }, - "node_modules/@dqbd/tiktoken": { - "version": "1.0.7", - "resolved": "https://registry.npmjs.org/@dqbd/tiktoken/-/tiktoken-1.0.7.tgz", - "integrity": "sha512-bhR5k5W+8GLzysjk8zTMVygQZsgvf7W1F0IlL4ZQ5ugjo5rCyiwGM5d8DYriXspytfu98tv59niang3/T+FoDw==" - }, - "node_modules/@fortaine/fetch-event-source": { - "version": "3.0.6", - "resolved": "https://registry.npmjs.org/@fortaine/fetch-event-source/-/fetch-event-source-3.0.6.tgz", - "integrity": "sha512-621GAuLMvKtyZQ3IA6nlDWhV1V/7PGOTNIGLUifxt0KzM+dZIweJ6F3XvQF3QnqeNfS1N7WQ0Kil1Di/lhChEw==", + "node_modules/@langchain/community": { + "version": "0.0.52", + "resolved": "https://registry.npmjs.org/@langchain/community/-/community-0.0.52.tgz", + "integrity": "sha512-L+IMAAaLNP7++4HhdvuVJegc8bdw8WP77Jvp98YcySFZTZWH1yasSQSlFn3jgBk+3xLBsudpTZuttKTrZ/TtVQ==", + "dependencies": { + "@langchain/core": "~0.1.60", + "@langchain/openai": "~0.0.28", + "expr-eval": "^2.0.2", + "flat": "^5.0.2", + "langsmith": "~0.1.1", + "uuid": "^9.0.0", + "zod": "^3.22.3", + "zod-to-json-schema": "^3.22.5" + }, "engines": { - "node": ">=16.15" + "node": ">=18" + }, + "peerDependencies": { + "@aws-crypto/sha256-js": "^5.0.0", + "@aws-sdk/client-bedrock-agent-runtime": "^3.485.0", + "@aws-sdk/client-bedrock-runtime": "^3.422.0", + "@aws-sdk/client-dynamodb": "^3.310.0", + "@aws-sdk/client-kendra": "^3.352.0", + "@aws-sdk/client-lambda": "^3.310.0", + "@aws-sdk/client-sagemaker-runtime": "^3.310.0", + "@aws-sdk/client-sfn": "^3.310.0", + "@aws-sdk/credential-provider-node": "^3.388.0", + "@azure/search-documents": "^12.0.0", + "@clickhouse/client": "^0.2.5", + "@cloudflare/ai": "*", + "@datastax/astra-db-ts": "^1.0.0", + "@elastic/elasticsearch": "^8.4.0", + "@getmetal/metal-sdk": "*", + "@getzep/zep-js": "^0.9.0", + "@gomomento/sdk": "^1.51.1", + "@gomomento/sdk-core": "^1.51.1", + "@google-ai/generativelanguage": "^0.2.1", + "@gradientai/nodejs-sdk": "^1.2.0", + "@huggingface/inference": "^2.6.4", + "@mozilla/readability": "*", + "@neondatabase/serverless": "*", + "@opensearch-project/opensearch": "*", + "@pinecone-database/pinecone": "*", + "@planetscale/database": "^1.8.0", + "@premai/prem-sdk": "^0.3.25", + "@qdrant/js-client-rest": "^1.2.0", + "@raycast/api": "^1.55.2", + "@rockset/client": "^0.9.1", + "@smithy/eventstream-codec": "^2.0.5", + "@smithy/protocol-http": "^3.0.6", + "@smithy/signature-v4": "^2.0.10", + "@smithy/util-utf8": "^2.0.0", + "@supabase/postgrest-js": "^1.1.1", + "@supabase/supabase-js": "^2.10.0", + "@tensorflow-models/universal-sentence-encoder": "*", + "@tensorflow/tfjs-converter": "*", + "@tensorflow/tfjs-core": "*", + "@upstash/redis": "^1.20.6", + "@upstash/vector": "^1.0.7", + "@vercel/kv": "^0.2.3", + "@vercel/postgres": "^0.5.0", + "@writerai/writer-sdk": "^0.40.2", + "@xata.io/client": "^0.28.0", + "@xenova/transformers": "^2.5.4", + "@zilliz/milvus2-sdk-node": ">=2.2.7", + "better-sqlite3": "^9.4.0", + "cassandra-driver": "^4.7.2", + "cborg": "^4.1.1", + "chromadb": "*", + "closevector-common": "0.1.3", + "closevector-node": "0.1.6", + "closevector-web": "0.1.6", + "cohere-ai": "*", + "convex": "^1.3.1", + "couchbase": "^4.3.0", + "discord.js": "^14.14.1", + "dria": "^0.0.3", + "duck-duck-scrape": "^2.2.5", + "faiss-node": "^0.5.1", + "firebase-admin": "^11.9.0 || ^12.0.0", + "google-auth-library": "^8.9.0", + "googleapis": "^126.0.1", + "hnswlib-node": "^3.0.0", + "html-to-text": "^9.0.5", + "interface-datastore": "^8.2.11", + "ioredis": "^5.3.2", + "it-all": "^3.0.4", + "jsdom": "*", + "jsonwebtoken": "^9.0.2", + "llmonitor": "^0.5.9", + "lodash": "^4.17.21", + "lunary": "^0.6.11", + "mongodb": ">=5.2.0", + "mysql2": "^3.3.3", + "neo4j-driver": "*", + "node-llama-cpp": "*", + "pg": "^8.11.0", + "pg-copy-streams": "^6.0.5", + "pickleparser": "^0.2.1", + "portkey-ai": "^0.1.11", + "redis": "*", + "replicate": "^0.18.0", + "typeorm": "^0.3.12", + "typesense": "^1.5.3", + "usearch": "^1.1.1", + "vectordb": "^0.1.4", + "voy-search": "0.6.2", + "weaviate-ts-client": "*", + "web-auth-library": "^1.0.3", + "ws": "^8.14.2" + }, + "peerDependenciesMeta": { + "@aws-crypto/sha256-js": { + "optional": true + }, + "@aws-sdk/client-bedrock-agent-runtime": { + "optional": true + }, + "@aws-sdk/client-bedrock-runtime": { + "optional": true + }, + "@aws-sdk/client-dynamodb": { + "optional": true + }, + "@aws-sdk/client-kendra": { + "optional": true + }, + "@aws-sdk/client-lambda": { + "optional": true + }, + "@aws-sdk/client-sagemaker-runtime": { + "optional": true + }, + "@aws-sdk/client-sfn": { + "optional": true + }, + "@aws-sdk/credential-provider-node": { + "optional": true + }, + "@azure/search-documents": { + "optional": true + }, + "@clickhouse/client": { + "optional": true + }, + "@cloudflare/ai": { + "optional": true + }, + "@datastax/astra-db-ts": { + "optional": true + }, + "@elastic/elasticsearch": { + "optional": true + }, + "@getmetal/metal-sdk": { + "optional": true + }, + "@getzep/zep-js": { + "optional": true + }, + "@gomomento/sdk": { + "optional": true + }, + "@gomomento/sdk-core": { + "optional": true + }, + "@google-ai/generativelanguage": { + "optional": true + }, + "@gradientai/nodejs-sdk": { + "optional": true + }, + "@huggingface/inference": { + "optional": true + }, + "@mozilla/readability": { + "optional": true + }, + "@neondatabase/serverless": { + "optional": true + }, + "@opensearch-project/opensearch": { + "optional": true + }, + "@pinecone-database/pinecone": { + "optional": true + }, + "@planetscale/database": { + "optional": true + }, + "@premai/prem-sdk": { + "optional": true + }, + "@qdrant/js-client-rest": { + "optional": true + }, + "@raycast/api": { + "optional": true + }, + "@rockset/client": { + "optional": true + }, + "@smithy/eventstream-codec": { + "optional": true + }, + "@smithy/protocol-http": { + "optional": true + }, + "@smithy/signature-v4": { + "optional": true + }, + "@smithy/util-utf8": { + "optional": true + }, + "@supabase/postgrest-js": { + "optional": true + }, + "@supabase/supabase-js": { + "optional": true + }, + "@tensorflow-models/universal-sentence-encoder": { + "optional": true + }, + "@tensorflow/tfjs-converter": { + "optional": true + }, + "@tensorflow/tfjs-core": { + "optional": true + }, + "@upstash/redis": { + "optional": true + }, + "@upstash/vector": { + "optional": true + }, + "@vercel/kv": { + "optional": true + }, + "@vercel/postgres": { + "optional": true + }, + "@writerai/writer-sdk": { + "optional": true + }, + "@xata.io/client": { + "optional": true + }, + "@xenova/transformers": { + "optional": true + }, + "@zilliz/milvus2-sdk-node": { + "optional": true + }, + "better-sqlite3": { + "optional": true + }, + "cassandra-driver": { + "optional": true + }, + "cborg": { + "optional": true + }, + "chromadb": { + "optional": true + }, + "closevector-common": { + "optional": true + }, + "closevector-node": { + "optional": true + }, + "closevector-web": { + "optional": true + }, + "cohere-ai": { + "optional": true + }, + "convex": { + "optional": true + }, + "couchbase": { + "optional": true + }, + "discord.js": { + "optional": true + }, + "dria": { + "optional": true + }, + "duck-duck-scrape": { + "optional": true + }, + "faiss-node": { + "optional": true + }, + "firebase-admin": { + "optional": true + }, + "google-auth-library": { + "optional": true + }, + "googleapis": { + "optional": true + }, + "hnswlib-node": { + "optional": true + }, + "html-to-text": { + "optional": true + }, + "interface-datastore": { + "optional": true + }, + "ioredis": { + "optional": true + }, + "it-all": { + "optional": true + }, + "jsdom": { + "optional": true + }, + "jsonwebtoken": { + "optional": true + }, + "llmonitor": { + "optional": true + }, + "lodash": { + "optional": true + }, + "lunary": { + "optional": true + }, + "mongodb": { + "optional": true + }, + "mysql2": { + "optional": true + }, + "neo4j-driver": { + "optional": true + }, + "node-llama-cpp": { + "optional": true + }, + "pg": { + "optional": true + }, + "pg-copy-streams": { + "optional": true + }, + "pickleparser": { + "optional": true + }, + "portkey-ai": { + "optional": true + }, + "redis": { + "optional": true + }, + "replicate": { + "optional": true + }, + "typeorm": { + "optional": true + }, + "typesense": { + "optional": true + }, + "usearch": { + "optional": true + }, + "vectordb": { + "optional": true + }, + "voy-search": { + "optional": true + }, + "weaviate-ts-client": { + "optional": true + }, + "web-auth-library": { + "optional": true + }, + "ws": { + "optional": true + } + } + }, + "node_modules/@langchain/core": { + "version": "0.1.60", + "resolved": "https://registry.npmjs.org/@langchain/core/-/core-0.1.60.tgz", + "integrity": "sha512-3EJW4ir0tFe17AakpXCgO9flSoDjFELpSQs2w/CMZ5FBlHYxo3ODgVQAZvlHy97khEVgcnvlL3EDhPE7IdNibA==", + "dependencies": { + "ansi-styles": "^5.0.0", + "camelcase": "6", + "decamelize": "1.2.0", + "js-tiktoken": "^1.0.8", + "langsmith": "~0.1.7", + "ml-distance": "^4.0.0", + "mustache": "^4.2.0", + "p-queue": "^6.6.2", + "p-retry": "4", + "uuid": "^9.0.0", + "zod": "^3.22.4", + "zod-to-json-schema": "^3.22.3" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@langchain/openai": { + "version": "0.0.28", + "resolved": "https://registry.npmjs.org/@langchain/openai/-/openai-0.0.28.tgz", + "integrity": "sha512-2s1RA3/eAnz4ahdzsMPBna9hfAqpFNlWdHiPxVGZ5yrhXsbLWWoPcF+22LCk9t0HJKtazi2GCIWc0HVXH9Abig==", + "dependencies": { + "@langchain/core": "~0.1.56", + "js-tiktoken": "^1.0.7", + "openai": "^4.32.1", + "zod": "^3.22.4", + "zod-to-json-schema": "^3.22.3" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@langchain/textsplitters": { + "version": "0.0.0", + "resolved": "https://registry.npmjs.org/@langchain/textsplitters/-/textsplitters-0.0.0.tgz", + "integrity": "sha512-3hPesWomnmVeYMppEGYbyv0v/sRUugUdlFBNn9m1ueJYHAIKbvCErkWxNUH3guyKKYgJVrkvZoQxcd9faucSaw==", + "dependencies": { + "@langchain/core": "~0.1", + "js-tiktoken": "^1.0.11" + }, + "engines": { + "node": ">=18" } }, "node_modules/@sqltools/formatter": { "version": "1.2.5", "resolved": "https://registry.npmjs.org/@sqltools/formatter/-/formatter-1.2.5.tgz", - "integrity": "sha512-Uy0+khmZqUrUGm5dmMqVlnvufZRSK0FbYzVgp0UMstm+F5+W2/jnEEQyc9vo1ZR/E5ZI/B1WjjoTqBqwJL6Krw==" + "integrity": "sha512-Uy0+khmZqUrUGm5dmMqVlnvufZRSK0FbYzVgp0UMstm+F5+W2/jnEEQyc9vo1ZR/E5ZI/B1WjjoTqBqwJL6Krw==", + "optional": true, + "peer": true }, "node_modules/@types/node": { "version": "18.16.4", "resolved": "https://registry.npmjs.org/@types/node/-/node-18.16.4.tgz", - "integrity": "sha512-LUhvPmAKAbgm+p/K11IWszLZVoZDlMF4NRmqbhEzDz/CnCuehPkZXwZbBCKGJsgjnuVejotBwM7B3Scrq4EqDw==", - "dev": true + "integrity": "sha512-LUhvPmAKAbgm+p/K11IWszLZVoZDlMF4NRmqbhEzDz/CnCuehPkZXwZbBCKGJsgjnuVejotBwM7B3Scrq4EqDw==" + }, + "node_modules/@types/node-fetch": { + "version": "2.6.11", + "resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.11.tgz", + "integrity": "sha512-24xFj9R5+rfQJLRyM56qh+wnVSYhyXC2tkoBndtY0U+vubqNsYXGjufB2nn8Q6gt0LrARwL6UBtMCSVCwl4B1g==", + "dependencies": { + "@types/node": "*", + "form-data": "^4.0.0" + } }, "node_modules/@types/retry": { "version": "0.12.0", "resolved": "https://registry.npmjs.org/@types/retry/-/retry-0.12.0.tgz", "integrity": "sha512-wWKOClTTiizcZhXnPY4wikVAwmdYHp8q6DmC+EJUzAMsycb7HB32Kh9RN4+0gExjmPmZSAQjgURXIGATPegAvA==" }, + "node_modules/@types/uuid": { + "version": "9.0.8", + "resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz", + "integrity": "sha512-jg+97EGIcY9AGHJJRaaPVgetKDsrTgbRjQ5Msgjh/DQKEFl0DtyRr/VCOyD1T2R1MNeWPK/u7JoGhlDZnKBAfA==" + }, + "node_modules/abort-controller": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz", + "integrity": "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==", + "dependencies": { + "event-target-shim": "^5.0.0" + }, + "engines": { + "node": ">=6.5" + } + }, + "node_modules/agentkeepalive": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.5.0.tgz", + "integrity": "sha512-5GG/5IbQQpC9FpkRGsSvZI5QYeSCzlJHdpBQntCsuTOxhKD8lqKhrleg2Yi7yvMIf82Ycmmqln9U8V9qwEiJew==", + "dependencies": { + "humanize-ms": "^1.2.1" + }, + "engines": { + "node": ">= 8.0.0" + } + }, "node_modules/ansi-regex": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "optional": true, + "peer": true, "engines": { "node": ">=8" } @@ -77,33 +550,41 @@ "node_modules/any-promise": { "version": "1.3.0", "resolved": "https://registry.npmjs.org/any-promise/-/any-promise-1.3.0.tgz", - "integrity": "sha512-7UvmKalWRt1wgjL1RrGxoSJW/0QZFIegpeGvZG9kjp8vrRu55XTHbwnqq2GpXm9uLbcuhxm3IqX9OB4MZR1b2A==" + "integrity": "sha512-7UvmKalWRt1wgjL1RrGxoSJW/0QZFIegpeGvZG9kjp8vrRu55XTHbwnqq2GpXm9uLbcuhxm3IqX9OB4MZR1b2A==", + "optional": true, + "peer": true }, "node_modules/app-root-path": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/app-root-path/-/app-root-path-3.1.0.tgz", "integrity": "sha512-biN3PwB2gUtjaYy/isrU3aNWI5w+fAfvHkSvCKeQGxhmYpwKFUxudR3Yya+KqVRHBmEDYh+/lTozYCFbmzX4nA==", + "optional": true, + "peer": true, "engines": { "node": ">= 6.0.0" } }, + "node_modules/argparse": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", + "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==" + }, "node_modules/asynckit": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==" }, - "node_modules/axios": { - "version": "0.26.1", - "resolved": "https://registry.npmjs.org/axios/-/axios-0.26.1.tgz", - "integrity": "sha512-fPwcX4EvnSHuInCMItEhAGnaSEXRBjtzh9fOtsE6E1G6p7vl7edEeZe11QHf18+6+9gR5PbKV/sGKNaD8YaMeA==", - "dependencies": { - "follow-redirects": "^1.14.8" - } - }, "node_modules/balanced-match": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", - "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==" + "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", + "optional": true, + "peer": true + }, + "node_modules/base-64": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/base-64/-/base-64-0.1.0.tgz", + "integrity": "sha512-Y5gU45svrR5tI2Vt/X9GPd3L0HNIKzGu202EjxrXMpuc2V2CiKgemAbUUsqYmZJvPtCXoUKjNZwBJzsNScUbXA==" }, "node_modules/base64-js": { "version": "1.5.1", @@ -141,15 +622,12 @@ "version": "2.0.1", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz", "integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==", + "optional": true, + "peer": true, "dependencies": { "balanced-match": "^1.0.0" } }, - "node_modules/browser-or-node": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/browser-or-node/-/browser-or-node-2.1.1.tgz", - "integrity": "sha512-8CVjaLJGuSKMVTxJ2DpBl5XnlNDiT4cQFeuCJJrvJmts9YrTZDizTX7PjC2s6W4x+MBGZeEY6dGMrF04/6Hgqg==" - }, "node_modules/buffer": { "version": "6.0.3", "resolved": "https://registry.npmjs.org/buffer/-/buffer-6.0.3.tgz", @@ -168,15 +646,30 @@ "url": "https://feross.org/support" } ], + "optional": true, + "peer": true, "dependencies": { "base64-js": "^1.3.1", "ieee754": "^1.2.1" } }, + "node_modules/camelcase": { + "version": "6.3.0", + "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-6.3.0.tgz", + "integrity": "sha512-Gmy6FhYlCY7uOElZUSbxo2UCDH8owEk996gkbrpsgGtrJLM3J7jGxl9Ic7Qwwj4ivOE5AWZWRMecDdF7hqGjFA==", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/chalk": { "version": "4.1.2", "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==", + "optional": true, + "peer": true, "dependencies": { "ansi-styles": "^4.1.0", "supports-color": "^7.1.0" @@ -192,6 +685,8 @@ "version": "4.3.0", "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "optional": true, + "peer": true, "dependencies": { "color-convert": "^2.0.1" }, @@ -202,10 +697,20 @@ "url": "https://github.com/chalk/ansi-styles?sponsor=1" } }, + "node_modules/charenc": { + "version": "0.0.2", + "resolved": "https://registry.npmjs.org/charenc/-/charenc-0.0.2.tgz", + "integrity": "sha512-yrLQ/yVUFXkzg7EDQsPieE/53+0RlaWTs+wBrvW36cyilJ2SaDWfl4Yj7MtLTXleV9uEKefbAGUPv2/iWSooRA==", + "engines": { + "node": "*" + } + }, "node_modules/cli-highlight": { "version": "2.1.11", "resolved": "https://registry.npmjs.org/cli-highlight/-/cli-highlight-2.1.11.tgz", "integrity": "sha512-9KDcoEVwyUXrjcJNvHD0NFc/hiwe/WPVYIleQh2O1N2Zro5gWJZ/K+3DGn8w8P/F6FxOgzyC5bxDyHIgCSPhGg==", + "optional": true, + "peer": true, "dependencies": { "chalk": "^4.0.0", "highlight.js": "^10.7.1", @@ -226,6 +731,8 @@ "version": "7.0.4", "resolved": "https://registry.npmjs.org/cliui/-/cliui-7.0.4.tgz", "integrity": "sha512-OcRE68cOsVMXp1Yvonl/fzkQOyjLSu/8bhPDfQt0e0/Eb283TKP20Fs2MqoPsr9SwA595rRCA+QMzYc9nBP+JQ==", + "optional": true, + "peer": true, "dependencies": { "string-width": "^4.2.0", "strip-ansi": "^6.0.0", @@ -236,6 +743,8 @@ "version": "16.2.0", "resolved": "https://registry.npmjs.org/yargs/-/yargs-16.2.0.tgz", "integrity": "sha512-D1mvvtDG0L5ft/jGWkLpG1+m0eQxOfaBvTNELraWj22wSVUMWxZUvYgJYcKh6jGGIkJFhH4IZPQhR4TKpc8mBw==", + "optional": true, + "peer": true, "dependencies": { "cliui": "^7.0.2", "escalade": "^3.1.1", @@ -253,6 +762,8 @@ "version": "20.2.9", "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.9.tgz", "integrity": "sha512-y11nGElTIV+CT3Zv9t7VKl+Q3hTQoT9a1Qzezhhl6Rp21gJ/IVTW7Z3y9EWXhuUBC2Shnf+DX0antecpAwSP8w==", + "optional": true, + "peer": true, "engines": { "node": ">=10" } @@ -261,6 +772,8 @@ "version": "8.0.1", "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz", "integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==", + "optional": true, + "peer": true, "dependencies": { "string-width": "^4.2.0", "strip-ansi": "^6.0.1", @@ -274,6 +787,8 @@ "version": "2.0.1", "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "optional": true, + "peer": true, "dependencies": { "color-name": "~1.1.4" }, @@ -284,7 +799,9 @@ "node_modules/color-name": { "version": "1.1.4", "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", - "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==" + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", + "optional": true, + "peer": true }, "node_modules/combined-stream": { "version": "1.0.8", @@ -297,18 +814,28 @@ "node": ">= 0.8" } }, - "node_modules/cross-fetch": { - "version": "3.1.5", - "resolved": "https://registry.npmjs.org/cross-fetch/-/cross-fetch-3.1.5.tgz", - "integrity": "sha512-lvb1SBsI0Z7GDwmuid+mU3kWVBwTVUbe7S0H52yaaAdQOXq2YktTCZdlAcNKFzE6QtRz0snpw9bNiPeOIkkQvw==", - "dependencies": { - "node-fetch": "2.6.7" + "node_modules/commander": { + "version": "10.0.1", + "resolved": "https://registry.npmjs.org/commander/-/commander-10.0.1.tgz", + "integrity": "sha512-y4Mg2tXshplEbSGzx7amzPwKKOCGuoSRP/CjEdwwk0FOGlUbq6lKuoyDZTNZkmxHdJtp54hdfY/JUrdL7Xfdug==", + "engines": { + "node": ">=14" + } + }, + "node_modules/crypt": { + "version": "0.0.2", + "resolved": "https://registry.npmjs.org/crypt/-/crypt-0.0.2.tgz", + "integrity": "sha512-mCxBlsHFYh9C+HVpiEacem8FEBnMXgU9gy4zmNC+SXAZNB/1idgp/aulFJ4FgCi7GPEVbfyng092GqL2k2rmow==", + "engines": { + "node": "*" } }, "node_modules/debug": { "version": "4.3.4", "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz", "integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==", + "optional": true, + "peer": true, "dependencies": { "ms": "2.1.2" }, @@ -321,6 +848,14 @@ } } }, + "node_modules/decamelize": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/decamelize/-/decamelize-1.2.0.tgz", + "integrity": "sha512-z2S+W9X73hAUUki+N+9Za2lBlun89zigOyGrsax+KUQ6wKW4ZoWpEYBkGhQjwAjjDCkWxhY0VKEhk8wzY7F5cA==", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/delayed-stream": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", @@ -329,10 +864,21 @@ "node": ">=0.4.0" } }, + "node_modules/digest-fetch": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/digest-fetch/-/digest-fetch-1.3.0.tgz", + "integrity": "sha512-CGJuv6iKNM7QyZlM2T3sPAdZWd/p9zQiRNS9G+9COUCwzWFTs0Xp8NF5iePx7wtvhDykReiRRrSeNb4oMmB8lA==", + "dependencies": { + "base-64": "^0.1.0", + "md5": "^2.3.0" + } + }, "node_modules/dotenv": { "version": "16.0.3", "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.0.3.tgz", "integrity": "sha512-7GO6HghkA5fYG9TYnNxi14/7K9f5occMlp3zXAuSxn7CKCxt9xbNWG7yF8hTCSUchlfWSe3uLmlPfigevRItzQ==", + "optional": true, + "peer": true, "engines": { "node": ">=12" } @@ -340,12 +886,24 @@ "node_modules/emoji-regex": { "version": "8.0.0", "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==" + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "optional": true, + "peer": true }, "node_modules/escalade": { "version": "3.1.1", "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.1.tgz", "integrity": "sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw==", + "optional": true, + "peer": true, + "engines": { + "node": ">=6" + } + }, + "node_modules/event-target-shim": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz", + "integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==", "engines": { "node": ">=6" } @@ -368,25 +926,6 @@ "flat": "cli.js" } }, - "node_modules/follow-redirects": { - "version": "1.15.6", - "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz", - "integrity": "sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==", - "funding": [ - { - "type": "individual", - "url": "https://github.com/sponsors/RubenVerborgh" - } - ], - "engines": { - "node": ">=4.0" - }, - "peerDependenciesMeta": { - "debug": { - "optional": true - } - } - }, "node_modules/form-data": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz", @@ -400,15 +939,44 @@ "node": ">= 6" } }, + "node_modules/form-data-encoder": { + "version": "1.7.2", + "resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-1.7.2.tgz", + "integrity": "sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==" + }, + "node_modules/formdata-node": { + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/formdata-node/-/formdata-node-4.4.1.tgz", + "integrity": "sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==", + "dependencies": { + "node-domexception": "1.0.0", + "web-streams-polyfill": "4.0.0-beta.3" + }, + "engines": { + "node": ">= 12.20" + } + }, + "node_modules/formdata-node/node_modules/web-streams-polyfill": { + "version": "4.0.0-beta.3", + "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-4.0.0-beta.3.tgz", + "integrity": "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==", + "engines": { + "node": ">= 14" + } + }, "node_modules/fs.realpath": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", - "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==" + "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==", + "optional": true, + "peer": true }, "node_modules/get-caller-file": { "version": "2.0.5", "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==", + "optional": true, + "peer": true, "engines": { "node": "6.* || 8.* || >= 10.*" } @@ -417,6 +985,8 @@ "version": "8.1.0", "resolved": "https://registry.npmjs.org/glob/-/glob-8.1.0.tgz", "integrity": "sha512-r8hpEjiQEYlF2QU0df3dS+nxxSIreXQS1qRhMJM0Q5NDdR386C7jb7Hwwod8Fgiuex+k0GFjgft18yvxm5XoCQ==", + "optional": true, + "peer": true, "dependencies": { "fs.realpath": "^1.0.0", "inflight": "^1.0.4", @@ -435,6 +1005,8 @@ "version": "4.0.0", "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", + "optional": true, + "peer": true, "engines": { "node": ">=8" } @@ -443,10 +1015,20 @@ "version": "10.7.3", "resolved": "https://registry.npmjs.org/highlight.js/-/highlight.js-10.7.3.tgz", "integrity": "sha512-tzcUFauisWKNHaRkN4Wjl/ZA07gENAjFl3J/c480dprkGTg5EQstgaNFqBfUqCq54kZRIEcreTsAgF/m2quD7A==", + "optional": true, + "peer": true, "engines": { "node": "*" } }, + "node_modules/humanize-ms": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/humanize-ms/-/humanize-ms-1.2.1.tgz", + "integrity": "sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==", + "dependencies": { + "ms": "^2.0.0" + } + }, "node_modules/ieee754": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz", @@ -464,12 +1046,16 @@ "type": "consulting", "url": "https://feross.org/support" } - ] + ], + "optional": true, + "peer": true }, "node_modules/inflight": { "version": "1.0.6", "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", "integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==", + "optional": true, + "peer": true, "dependencies": { "once": "^1.3.0", "wrappy": "1" @@ -478,21 +1064,49 @@ "node_modules/inherits": { "version": "2.0.4", "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", - "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==" + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", + "optional": true, + "peer": true }, "node_modules/is-any-array": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/is-any-array/-/is-any-array-2.0.1.tgz", "integrity": "sha512-UtilS7hLRu++wb/WBAw9bNuP1Eg04Ivn1vERJck8zJthEvXCBEBpGR/33u/xLKWEQf95803oalHrVDptcAvFdQ==" }, + "node_modules/is-buffer": { + "version": "1.1.6", + "resolved": "https://registry.npmjs.org/is-buffer/-/is-buffer-1.1.6.tgz", + "integrity": "sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w==" + }, "node_modules/is-fullwidth-code-point": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", + "optional": true, + "peer": true, "engines": { "node": ">=8" } }, + "node_modules/js-tiktoken": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/js-tiktoken/-/js-tiktoken-1.0.11.tgz", + "integrity": "sha512-PajXFLq2vx7/8jllQZ43vzNpAai/0MOVdJjW/UrNyJorNQRTjHrqdGJG/mjHVy7h9M6dW6CaG43eNLMYFkTh6w==", + "dependencies": { + "base64-js": "^1.5.1" + } + }, + "node_modules/js-yaml": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz", + "integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==", + "dependencies": { + "argparse": "^2.0.1" + }, + "bin": { + "js-yaml": "bin/js-yaml.js" + } + }, "node_modules/jsonpointer": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/jsonpointer/-/jsonpointer-5.0.1.tgz", @@ -502,76 +1116,120 @@ } }, "node_modules/langchain": { - "version": "0.0.67", - "resolved": "https://registry.npmjs.org/langchain/-/langchain-0.0.67.tgz", - "integrity": "sha512-OO9NEoVYJyNTmrA76rgisA48LkA6Si7qVAS+1hakzKwf/Hj7GhvDe/NpVaWmOFtkAHusJHSbCplbeJKWIgFR2g==", + "version": "0.1.36", + "resolved": "https://registry.npmjs.org/langchain/-/langchain-0.1.36.tgz", + "integrity": "sha512-NTbnCL/jKWIeEI//Nm1oG8nhW3vkYWvEMr1MPotmTThTfeKfO87eV/OAzAyh6Ruy6GFs/qofRgQZGIe6XvXTNQ==", "dependencies": { - "@anthropic-ai/sdk": "^0.4.3", - "@dqbd/tiktoken": "^1.0.7", - "ansi-styles": "^5.0.0", + "@anthropic-ai/sdk": "^0.9.1", + "@langchain/community": "~0.0.47", + "@langchain/core": "~0.1.60", + "@langchain/openai": "~0.0.28", + "@langchain/textsplitters": "~0.0.0", "binary-extensions": "^2.2.0", - "browser-or-node": "^2.1.1", - "expr-eval": "^2.0.2", - "flat": "^5.0.2", + "js-tiktoken": "^1.0.7", + "js-yaml": "^4.1.0", "jsonpointer": "^5.0.1", + "langchainhub": "~0.0.8", + "langsmith": "~0.1.7", "ml-distance": "^4.0.0", - "object-hash": "^3.0.0", - "openai": "^3.2.0", - "p-queue": "^6.6.2", + "openapi-types": "^12.1.3", "p-retry": "4", "uuid": "^9.0.0", "yaml": "^2.2.1", - "zod": "^3.21.4", - "zod-to-json-schema": "^3.20.4" + "zod": "^3.22.4", + "zod-to-json-schema": "^3.22.3" }, "engines": { "node": ">=18" }, "peerDependencies": { - "@aws-sdk/client-lambda": "^3.310.0", "@aws-sdk/client-s3": "^3.310.0", - "@getmetal/metal-sdk": "*", - "@huggingface/inference": "^1.5.1", - "@opensearch-project/opensearch": "*", + "@aws-sdk/client-sagemaker-runtime": "^3.310.0", + "@aws-sdk/client-sfn": "^3.310.0", + "@aws-sdk/credential-provider-node": "^3.388.0", + "@azure/storage-blob": "^12.15.0", + "@gomomento/sdk": "^1.51.1", + "@gomomento/sdk-core": "^1.51.1", + "@gomomento/sdk-web": "^1.51.1", + "@google-ai/generativelanguage": "^0.2.1", + "@google-cloud/storage": "^6.10.1 || ^7.7.0", + "@mendable/firecrawl-js": "^0.0.13", + "@notionhq/client": "^2.2.10", "@pinecone-database/pinecone": "*", "@supabase/supabase-js": "^2.10.0", - "@tensorflow-models/universal-sentence-encoder": "*", - "@tensorflow/tfjs-converter": "*", - "@tensorflow/tfjs-core": "*", - "@zilliz/milvus2-sdk-node": "^2.2.0", + "@vercel/kv": "^0.2.3", + "@xata.io/client": "^0.28.0", + "apify-client": "^2.7.1", + "assemblyai": "^4.0.0", "axios": "*", "cheerio": "^1.0.0-rc.12", - "chromadb": "^1.4.0", - "cohere-ai": "^5.0.2", + "chromadb": "*", + "convex": "^1.3.1", + "couchbase": "^4.3.0", "d3-dsv": "^2.0.0", "epub2": "^3.0.1", - "hnswlib-node": "^1.4.2", + "fast-xml-parser": "*", + "google-auth-library": "^8.9.0", + "handlebars": "^4.7.8", "html-to-text": "^9.0.5", - "mammoth": "*", - "mongodb": "^5.2.0", + "ignore": "^5.2.0", + "ioredis": "^5.3.2", + "jsdom": "*", + "mammoth": "^1.6.0", + "mongodb": ">=5.2.0", + "node-llama-cpp": "*", + "notion-to-md": "^3.1.0", + "officeparser": "^4.0.4", "pdf-parse": "1.1.1", + "peggy": "^3.0.2", "playwright": "^1.32.1", "puppeteer": "^19.7.2", + "pyodide": "^0.24.1", "redis": "^4.6.4", - "replicate": "^0.9.0", - "srt-parser-2": "^1.2.2", + "sonix-speech-recognition": "^2.1.1", + "srt-parser-2": "^1.2.3", "typeorm": "^0.3.12", - "weaviate-ts-client": "^1.0.0" + "weaviate-ts-client": "*", + "web-auth-library": "^1.0.3", + "ws": "^8.14.2", + "youtube-transcript": "^1.0.6", + "youtubei.js": "^9.1.0" }, "peerDependenciesMeta": { - "@aws-sdk/client-lambda": { - "optional": true - }, "@aws-sdk/client-s3": { "optional": true }, - "@getmetal/metal-sdk": { + "@aws-sdk/client-sagemaker-runtime": { "optional": true }, - "@huggingface/inference": { + "@aws-sdk/client-sfn": { "optional": true }, - "@opensearch-project/opensearch": { + "@aws-sdk/credential-provider-node": { + "optional": true + }, + "@azure/storage-blob": { + "optional": true + }, + "@gomomento/sdk": { + "optional": true + }, + "@gomomento/sdk-core": { + "optional": true + }, + "@gomomento/sdk-web": { + "optional": true + }, + "@google-ai/generativelanguage": { + "optional": true + }, + "@google-cloud/storage": { + "optional": true + }, + "@mendable/firecrawl-js": { + "optional": true + }, + "@notionhq/client": { "optional": true }, "@pinecone-database/pinecone": { @@ -580,16 +1238,16 @@ "@supabase/supabase-js": { "optional": true }, - "@tensorflow-models/universal-sentence-encoder": { + "@vercel/kv": { "optional": true }, - "@tensorflow/tfjs-converter": { + "@xata.io/client": { "optional": true }, - "@tensorflow/tfjs-core": { + "apify-client": { "optional": true }, - "@zilliz/milvus2-sdk-node": { + "assemblyai": { "optional": true }, "axios": { @@ -601,7 +1259,10 @@ "chromadb": { "optional": true }, - "cohere-ai": { + "convex": { + "optional": true + }, + "couchbase": { "optional": true }, "d3-dsv": { @@ -610,31 +1271,64 @@ "epub2": { "optional": true }, - "hnswlib-node": { + "faiss-node": { + "optional": true + }, + "fast-xml-parser": { + "optional": true + }, + "google-auth-library": { + "optional": true + }, + "handlebars": { "optional": true }, "html-to-text": { "optional": true }, + "ignore": { + "optional": true + }, + "ioredis": { + "optional": true + }, + "jsdom": { + "optional": true + }, "mammoth": { "optional": true }, "mongodb": { "optional": true }, + "node-llama-cpp": { + "optional": true + }, + "notion-to-md": { + "optional": true + }, + "officeparser": { + "optional": true + }, "pdf-parse": { "optional": true }, + "peggy": { + "optional": true + }, "playwright": { "optional": true }, "puppeteer": { "optional": true }, + "pyodide": { + "optional": true + }, "redis": { "optional": true }, - "replicate": { + "sonix-speech-recognition": { "optional": true }, "srt-parser-2": { @@ -645,9 +1339,48 @@ }, "weaviate-ts-client": { "optional": true + }, + "web-auth-library": { + "optional": true + }, + "ws": { + "optional": true + }, + "youtube-transcript": { + "optional": true + }, + "youtubei.js": { + "optional": true } } }, + "node_modules/langchainhub": { + "version": "0.0.8", + "resolved": "https://registry.npmjs.org/langchainhub/-/langchainhub-0.0.8.tgz", + "integrity": "sha512-Woyb8YDHgqqTOZvWIbm2CaFDGfZ4NTSyXV687AG4vXEfoNo7cGQp7nhl7wL3ehenKWmNEmcxCLgOZzW8jE6lOQ==" + }, + "node_modules/langsmith": { + "version": "0.1.18", + "resolved": "https://registry.npmjs.org/langsmith/-/langsmith-0.1.18.tgz", + "integrity": "sha512-LHk0aIFAl3/iiKvUzAiM8Xdm13bRO70XERQeHCF99fL2X815Jc47nxu6m7usSuQC8sw6rirCKZbGm18cqdUEzA==", + "dependencies": { + "@types/uuid": "^9.0.1", + "commander": "^10.0.1", + "p-queue": "^6.6.2", + "p-retry": "4", + "uuid": "^9.0.0" + } + }, + "node_modules/md5": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/md5/-/md5-2.3.0.tgz", + "integrity": "sha512-T1GITYmFaKuO91vxyoQMFETst+O71VUPEU3ze5GNzDm0OWdP8v1ziTaAEPUr/3kLsY3Sftgz242A1SetQiDL7g==", + "dependencies": { + "charenc": "0.0.2", + "crypt": "0.0.2", + "is-buffer": "~1.1.6" + } + }, "node_modules/mime-db": { "version": "1.52.0", "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", @@ -671,6 +1404,8 @@ "version": "5.1.6", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.6.tgz", "integrity": "sha512-lKwV/1brpG6mBUFHtb7NUmtABCb2WZZmm2wNiOA5hAb8VdCS4B3dtMWyvcoViccwAW/COERjXLt0zP1zXUN26g==", + "optional": true, + "peer": true, "dependencies": { "brace-expansion": "^2.0.1" }, @@ -682,6 +1417,8 @@ "version": "2.1.6", "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-2.1.6.tgz", "integrity": "sha512-+hEnITedc8LAtIP9u3HJDFIdcLV2vXP33sqLLIzkv1Db1zO/1OxbvYf0Y1OC/S/Qo5dxHXepofhmxL02PsKe+A==", + "optional": true, + "peer": true, "bin": { "mkdirp": "dist/cjs/src/bin.js" }, @@ -737,20 +1474,48 @@ "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" }, + "node_modules/mustache": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/mustache/-/mustache-4.2.0.tgz", + "integrity": "sha512-71ippSywq5Yb7/tVYyGbkBggbU8H3u5Rz56fH60jGFgr8uHwxs+aSKeqmluIVzM0m0kB7xQjKS6qPfd0b2ZoqQ==", + "bin": { + "mustache": "bin/mustache" + } + }, "node_modules/mz": { "version": "2.7.0", "resolved": "https://registry.npmjs.org/mz/-/mz-2.7.0.tgz", "integrity": "sha512-z81GNO7nnYMEhrGh9LeymoE4+Yr0Wn5McHIZMK5cfQCl+NDX08sCZgUc9/6MHni9IWuFLm1Z3HTCXu2z9fN62Q==", + "optional": true, + "peer": true, "dependencies": { "any-promise": "^1.0.0", "object-assign": "^4.0.1", "thenify-all": "^1.0.0" } }, + "node_modules/node-domexception": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz", + "integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/jimmywarting" + }, + { + "type": "github", + "url": "https://paypal.me/jimmywarting" + } + ], + "engines": { + "node": ">=10.5.0" + } + }, "node_modules/node-fetch": { - "version": "2.6.7", - "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.7.tgz", - "integrity": "sha512-ZjMPFEfVx5j+y2yF35Kzx5sF7kDzxuDj6ziH4FFbOp87zKDZNx8yExJIb05OGF4Nlt9IHFIMBkRl41VdvcNdbQ==", + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", + "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", "dependencies": { "whatwg-url": "^5.0.0" }, @@ -781,35 +1546,45 @@ "version": "4.1.1", "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==", + "optional": true, + "peer": true, "engines": { "node": ">=0.10.0" } }, - "node_modules/object-hash": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/object-hash/-/object-hash-3.0.0.tgz", - "integrity": "sha512-RSn9F68PjH9HqtltsSnqYC1XXoWe9Bju5+213R98cNGttag9q9yAOTzdbsqvIa7aNm5WffBZFpWYr2aWrklWAw==", - "engines": { - "node": ">= 6" - } - }, "node_modules/once": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", + "optional": true, + "peer": true, "dependencies": { "wrappy": "1" } }, "node_modules/openai": { - "version": "3.2.1", - "resolved": "https://registry.npmjs.org/openai/-/openai-3.2.1.tgz", - "integrity": "sha512-762C9BNlJPbjjlWZi4WYK9iM2tAVAv0uUp1UmI34vb0CN5T2mjB/qM6RYBmNKMh/dN9fC+bxqPwWJZUTWW052A==", + "version": "4.38.5", + "resolved": "https://registry.npmjs.org/openai/-/openai-4.38.5.tgz", + "integrity": "sha512-Ym5GJL98ZhLJJ7enBx53jjG3vwN/fsB+Ozh46nnRZZS9W1NiYqbwkJ+sXd3dkCIiWIgcyyOPL2Zr8SQAzbpj3g==", "dependencies": { - "axios": "^0.26.0", - "form-data": "^4.0.0" + "@types/node": "^18.11.18", + "@types/node-fetch": "^2.6.4", + "abort-controller": "^3.0.0", + "agentkeepalive": "^4.2.1", + "form-data-encoder": "1.7.2", + "formdata-node": "^4.3.2", + "node-fetch": "^2.6.7", + "web-streams-polyfill": "^3.2.1" + }, + "bin": { + "openai": "bin/cli" } }, + "node_modules/openapi-types": { + "version": "12.1.3", + "resolved": "https://registry.npmjs.org/openapi-types/-/openapi-types-12.1.3.tgz", + "integrity": "sha512-N4YtSYJqghVu4iek2ZUvcN/0aqH1kRDuNqzcycDxhOUpg7GdvLa2F3DgS6yBNhInhv2r/6I0Flkn7CqL8+nIcw==" + }, "node_modules/p-finally": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/p-finally/-/p-finally-1.0.0.tgz", @@ -859,12 +1634,16 @@ "node_modules/parse5": { "version": "5.1.1", "resolved": "https://registry.npmjs.org/parse5/-/parse5-5.1.1.tgz", - "integrity": "sha512-ugq4DFI0Ptb+WWjAdOK16+u/nHfiIrcE+sh8kZMaM0WllQKLI9rOUq6c2b7cwPkXdzfQESqvoqK6ug7U/Yyzug==" + "integrity": "sha512-ugq4DFI0Ptb+WWjAdOK16+u/nHfiIrcE+sh8kZMaM0WllQKLI9rOUq6c2b7cwPkXdzfQESqvoqK6ug7U/Yyzug==", + "optional": true, + "peer": true }, "node_modules/parse5-htmlparser2-tree-adapter": { "version": "6.0.1", "resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-6.0.1.tgz", "integrity": "sha512-qPuWvbLgvDGilKc5BoicRovlT4MtYT6JfJyBOMDsKoiT+GiuP5qyrPCnR9HcPECIJJmZh5jRndyNThnhhb/vlA==", + "optional": true, + "peer": true, "dependencies": { "parse5": "^6.0.1" } @@ -872,17 +1651,23 @@ "node_modules/parse5-htmlparser2-tree-adapter/node_modules/parse5": { "version": "6.0.1", "resolved": "https://registry.npmjs.org/parse5/-/parse5-6.0.1.tgz", - "integrity": "sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw==" + "integrity": "sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw==", + "optional": true, + "peer": true }, "node_modules/reflect-metadata": { "version": "0.1.13", "resolved": "https://registry.npmjs.org/reflect-metadata/-/reflect-metadata-0.1.13.tgz", - "integrity": "sha512-Ts1Y/anZELhSsjMcU605fU9RE4Oi3p5ORujwbIKXfWa+0Zxs510Qrmrce5/Jowq3cHSZSJqBjypxmHarc+vEWg==" + "integrity": "sha512-Ts1Y/anZELhSsjMcU605fU9RE4Oi3p5ORujwbIKXfWa+0Zxs510Qrmrce5/Jowq3cHSZSJqBjypxmHarc+vEWg==", + "optional": true, + "peer": true }, "node_modules/require-directory": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==", + "optional": true, + "peer": true, "engines": { "node": ">=0.10.0" } @@ -912,12 +1697,16 @@ "type": "consulting", "url": "https://feross.org/support" } - ] + ], + "optional": true, + "peer": true }, "node_modules/sha.js": { "version": "2.4.11", "resolved": "https://registry.npmjs.org/sha.js/-/sha.js-2.4.11.tgz", "integrity": "sha512-QMEp5B7cftE7APOjk5Y6xgrbWu+WkLVQwk8JNjZ8nKRciZaByEW6MubieAiToS7+dwvrjGhH8jRXz3MVd0AYqQ==", + "optional": true, + "peer": true, "dependencies": { "inherits": "^2.0.1", "safe-buffer": "^5.0.1" @@ -930,6 +1719,8 @@ "version": "4.2.3", "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "optional": true, + "peer": true, "dependencies": { "emoji-regex": "^8.0.0", "is-fullwidth-code-point": "^3.0.0", @@ -943,6 +1734,8 @@ "version": "6.0.1", "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "optional": true, + "peer": true, "dependencies": { "ansi-regex": "^5.0.1" }, @@ -954,6 +1747,8 @@ "version": "7.2.0", "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", + "optional": true, + "peer": true, "dependencies": { "has-flag": "^4.0.0" }, @@ -965,6 +1760,8 @@ "version": "3.3.1", "resolved": "https://registry.npmjs.org/thenify/-/thenify-3.3.1.tgz", "integrity": "sha512-RVZSIV5IG10Hk3enotrhvz0T9em6cyHBLkH/YAZuKqd8hRkKhSfCGIcP2KUY0EPxndzANBmNllzWPwak+bheSw==", + "optional": true, + "peer": true, "dependencies": { "any-promise": "^1.0.0" } @@ -973,6 +1770,8 @@ "version": "1.6.0", "resolved": "https://registry.npmjs.org/thenify-all/-/thenify-all-1.6.0.tgz", "integrity": "sha512-RNxQH/qI8/t3thXJDwcstUO4zeqo64+Uy/+sNVRBx4Xn2OX+OZ9oP+iJnNFqplFra2ZUVeKCSa2oVWi3T4uVmA==", + "optional": true, + "peer": true, "dependencies": { "thenify": ">= 3.1.0 < 4" }, @@ -988,12 +1787,16 @@ "node_modules/tslib": { "version": "2.5.0", "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.0.tgz", - "integrity": "sha512-336iVw3rtn2BUK7ORdIAHTyxHGRIHVReokCR3XjbckJMK7ms8FysBfhLR8IXnAgy7T0PTPNBWKiH514FOW/WSg==" + "integrity": "sha512-336iVw3rtn2BUK7ORdIAHTyxHGRIHVReokCR3XjbckJMK7ms8FysBfhLR8IXnAgy7T0PTPNBWKiH514FOW/WSg==", + "optional": true, + "peer": true }, "node_modules/typeorm": { "version": "0.3.15", "resolved": "https://registry.npmjs.org/typeorm/-/typeorm-0.3.15.tgz", "integrity": "sha512-R4JSw8QjDP1W+ypeRz/XrCXIqubrLSnNAzJAp9EQSQIPHTv+YmUHZis8g08lOwFpuhqL9m8jkPSz8GWEKlU/ow==", + "optional": true, + "peer": true, "dependencies": { "@sqltools/formatter": "^1.2.5", "app-root-path": "^3.1.0", @@ -1115,6 +1918,14 @@ "uuid": "dist/bin/uuid" } }, + "node_modules/web-streams-polyfill": { + "version": "3.3.3", + "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.3.3.tgz", + "integrity": "sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==", + "engines": { + "node": ">= 8" + } + }, "node_modules/webidl-conversions": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", @@ -1133,6 +1944,8 @@ "version": "7.0.0", "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", + "optional": true, + "peer": true, "dependencies": { "ansi-styles": "^4.0.0", "string-width": "^4.1.0", @@ -1149,6 +1962,8 @@ "version": "4.3.0", "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "optional": true, + "peer": true, "dependencies": { "color-convert": "^2.0.1" }, @@ -1162,12 +1977,16 @@ "node_modules/wrappy": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", - "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==" + "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", + "optional": true, + "peer": true }, "node_modules/y18n": { "version": "5.0.8", "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==", + "optional": true, + "peer": true, "engines": { "node": ">=10" } @@ -1184,6 +2003,8 @@ "version": "17.7.2", "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz", "integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==", + "optional": true, + "peer": true, "dependencies": { "cliui": "^8.0.1", "escalade": "^3.1.1", @@ -1201,67 +2022,154 @@ "version": "21.1.1", "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz", "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==", + "optional": true, + "peer": true, "engines": { "node": ">=12" } }, "node_modules/zod": { - "version": "3.21.4", - "resolved": "https://registry.npmjs.org/zod/-/zod-3.21.4.tgz", - "integrity": "sha512-m46AKbrzKVzOzs/DZgVnG5H55N1sv1M8qZU3A8RIKbs3mrACDNeIOeilDymVb2HdmP8uwshOCF4uJ8uM9rCqJw==", + "version": "3.23.4", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.23.4.tgz", + "integrity": "sha512-/AtWOKbBgjzEYYQRNfoGKHObgfAZag6qUJX1VbHo2PRBgS+wfWagEY2mizjfyAPcGesrJOcx/wcl0L9WnVrHFw==", "funding": { "url": "https://github.com/sponsors/colinhacks" } }, "node_modules/zod-to-json-schema": { - "version": "3.21.0", - "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.21.0.tgz", - "integrity": "sha512-+KyFCzqKwE6CxMSZxEUBaGmdXzB09BoFebO+xef/ISE4cTfReQlyThYbS8aqd3uWkdt9fz5BGHsY0CbY+Ra9oA==", + "version": "3.23.0", + "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.23.0.tgz", + "integrity": "sha512-az0uJ243PxsRIa2x1WmNE/pnuA05gUq/JB8Lwe1EDCCL/Fz9MgjYQ0fPlyc2Tcv6aF2ZA7WM5TWaRZVEFaAIag==", "peerDependencies": { - "zod": "^3.21.4" + "zod": "^3.23.3" } } }, "dependencies": { "@anthropic-ai/sdk": { - "version": "0.4.3", - "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.4.3.tgz", - "integrity": "sha512-SZrlXvjUUYT9rPmSzlTtmVk1OjVNpkCzILRluhiYwNcxXfQyvPJDi0CI6PyymygcgtqEF5EVqhKmC/PtPsNEIw==", + "version": "0.9.1", + "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.9.1.tgz", + "integrity": "sha512-wa1meQ2WSfoY8Uor3EdrJq0jTiZJoKoSii2ZVWRY1oN4Tlr5s59pADg9T79FTbPe1/se5c3pBeZgJL63wmuoBA==", "requires": { - "@fortaine/fetch-event-source": "^3.0.6", - "cross-fetch": "^3.1.5" + "@types/node": "^18.11.18", + "@types/node-fetch": "^2.6.4", + "abort-controller": "^3.0.0", + "agentkeepalive": "^4.2.1", + "digest-fetch": "^1.3.0", + "form-data-encoder": "1.7.2", + "formdata-node": "^4.3.2", + "node-fetch": "^2.6.7", + "web-streams-polyfill": "^3.2.1" } }, - "@dqbd/tiktoken": { - "version": "1.0.7", - "resolved": "https://registry.npmjs.org/@dqbd/tiktoken/-/tiktoken-1.0.7.tgz", - "integrity": "sha512-bhR5k5W+8GLzysjk8zTMVygQZsgvf7W1F0IlL4ZQ5ugjo5rCyiwGM5d8DYriXspytfu98tv59niang3/T+FoDw==" + "@langchain/community": { + "version": "0.0.52", + "resolved": "https://registry.npmjs.org/@langchain/community/-/community-0.0.52.tgz", + "integrity": "sha512-L+IMAAaLNP7++4HhdvuVJegc8bdw8WP77Jvp98YcySFZTZWH1yasSQSlFn3jgBk+3xLBsudpTZuttKTrZ/TtVQ==", + "requires": { + "@langchain/core": "0.1.5", + "@langchain/openai": "~0.0.28", + "expr-eval": "^2.0.2", + "flat": "^5.0.2", + "langsmith": "~0.1.1", + "uuid": "^9.0.0", + "zod": "^3.22.3", + "zod-to-json-schema": "^3.22.5" + } }, - "@fortaine/fetch-event-source": { - "version": "3.0.6", - "resolved": "https://registry.npmjs.org/@fortaine/fetch-event-source/-/fetch-event-source-3.0.6.tgz", - "integrity": "sha512-621GAuLMvKtyZQ3IA6nlDWhV1V/7PGOTNIGLUifxt0KzM+dZIweJ6F3XvQF3QnqeNfS1N7WQ0Kil1Di/lhChEw==" + "@langchain/core": { + "version": "0.1.60", + "resolved": "https://registry.npmjs.org/@langchain/core/-/core-0.1.60.tgz", + "integrity": "sha512-3EJW4ir0tFe17AakpXCgO9flSoDjFELpSQs2w/CMZ5FBlHYxo3ODgVQAZvlHy97khEVgcnvlL3EDhPE7IdNibA==", + "requires": { + "ansi-styles": "^5.0.0", + "camelcase": "6", + "decamelize": "1.2.0", + "js-tiktoken": "^1.0.8", + "langsmith": "~0.1.7", + "ml-distance": "^4.0.0", + "mustache": "^4.2.0", + "p-queue": "^6.6.2", + "p-retry": "4", + "uuid": "^9.0.0", + "zod": "^3.22.4", + "zod-to-json-schema": "^3.22.3" + } + }, + "@langchain/openai": { + "version": "0.0.28", + "resolved": "https://registry.npmjs.org/@langchain/openai/-/openai-0.0.28.tgz", + "integrity": "sha512-2s1RA3/eAnz4ahdzsMPBna9hfAqpFNlWdHiPxVGZ5yrhXsbLWWoPcF+22LCk9t0HJKtazi2GCIWc0HVXH9Abig==", + "requires": { + "@langchain/core": "0.1.5", + "js-tiktoken": "^1.0.7", + "openai": "^4.32.1", + "zod": "^3.22.4", + "zod-to-json-schema": "^3.22.3" + } + }, + "@langchain/textsplitters": { + "version": "0.0.0", + "resolved": "https://registry.npmjs.org/@langchain/textsplitters/-/textsplitters-0.0.0.tgz", + "integrity": "sha512-3hPesWomnmVeYMppEGYbyv0v/sRUugUdlFBNn9m1ueJYHAIKbvCErkWxNUH3guyKKYgJVrkvZoQxcd9faucSaw==", + "requires": { + "@langchain/core": "~0.1", + "js-tiktoken": "^1.0.11" + } }, "@sqltools/formatter": { "version": "1.2.5", "resolved": "https://registry.npmjs.org/@sqltools/formatter/-/formatter-1.2.5.tgz", - "integrity": "sha512-Uy0+khmZqUrUGm5dmMqVlnvufZRSK0FbYzVgp0UMstm+F5+W2/jnEEQyc9vo1ZR/E5ZI/B1WjjoTqBqwJL6Krw==" + "integrity": "sha512-Uy0+khmZqUrUGm5dmMqVlnvufZRSK0FbYzVgp0UMstm+F5+W2/jnEEQyc9vo1ZR/E5ZI/B1WjjoTqBqwJL6Krw==", + "optional": true, + "peer": true }, "@types/node": { "version": "18.16.4", "resolved": "https://registry.npmjs.org/@types/node/-/node-18.16.4.tgz", - "integrity": "sha512-LUhvPmAKAbgm+p/K11IWszLZVoZDlMF4NRmqbhEzDz/CnCuehPkZXwZbBCKGJsgjnuVejotBwM7B3Scrq4EqDw==", - "dev": true + "integrity": "sha512-LUhvPmAKAbgm+p/K11IWszLZVoZDlMF4NRmqbhEzDz/CnCuehPkZXwZbBCKGJsgjnuVejotBwM7B3Scrq4EqDw==" + }, + "@types/node-fetch": { + "version": "2.6.11", + "resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.11.tgz", + "integrity": "sha512-24xFj9R5+rfQJLRyM56qh+wnVSYhyXC2tkoBndtY0U+vubqNsYXGjufB2nn8Q6gt0LrARwL6UBtMCSVCwl4B1g==", + "requires": { + "@types/node": "*", + "form-data": "^4.0.0" + } }, "@types/retry": { "version": "0.12.0", "resolved": "https://registry.npmjs.org/@types/retry/-/retry-0.12.0.tgz", "integrity": "sha512-wWKOClTTiizcZhXnPY4wikVAwmdYHp8q6DmC+EJUzAMsycb7HB32Kh9RN4+0gExjmPmZSAQjgURXIGATPegAvA==" }, + "@types/uuid": { + "version": "9.0.8", + "resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz", + "integrity": "sha512-jg+97EGIcY9AGHJJRaaPVgetKDsrTgbRjQ5Msgjh/DQKEFl0DtyRr/VCOyD1T2R1MNeWPK/u7JoGhlDZnKBAfA==" + }, + "abort-controller": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz", + "integrity": "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==", + "requires": { + "event-target-shim": "^5.0.0" + } + }, + "agentkeepalive": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.5.0.tgz", + "integrity": "sha512-5GG/5IbQQpC9FpkRGsSvZI5QYeSCzlJHdpBQntCsuTOxhKD8lqKhrleg2Yi7yvMIf82Ycmmqln9U8V9qwEiJew==", + "requires": { + "humanize-ms": "^1.2.1" + } + }, "ansi-regex": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", - "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==" + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "optional": true, + "peer": true }, "ansi-styles": { "version": "5.2.0", @@ -1271,30 +2179,38 @@ "any-promise": { "version": "1.3.0", "resolved": "https://registry.npmjs.org/any-promise/-/any-promise-1.3.0.tgz", - "integrity": "sha512-7UvmKalWRt1wgjL1RrGxoSJW/0QZFIegpeGvZG9kjp8vrRu55XTHbwnqq2GpXm9uLbcuhxm3IqX9OB4MZR1b2A==" + "integrity": "sha512-7UvmKalWRt1wgjL1RrGxoSJW/0QZFIegpeGvZG9kjp8vrRu55XTHbwnqq2GpXm9uLbcuhxm3IqX9OB4MZR1b2A==", + "optional": true, + "peer": true }, "app-root-path": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/app-root-path/-/app-root-path-3.1.0.tgz", - "integrity": "sha512-biN3PwB2gUtjaYy/isrU3aNWI5w+fAfvHkSvCKeQGxhmYpwKFUxudR3Yya+KqVRHBmEDYh+/lTozYCFbmzX4nA==" + "integrity": "sha512-biN3PwB2gUtjaYy/isrU3aNWI5w+fAfvHkSvCKeQGxhmYpwKFUxudR3Yya+KqVRHBmEDYh+/lTozYCFbmzX4nA==", + "optional": true, + "peer": true + }, + "argparse": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", + "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==" }, "asynckit": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==" }, - "axios": { - "version": "0.26.1", - "resolved": "https://registry.npmjs.org/axios/-/axios-0.26.1.tgz", - "integrity": "sha512-fPwcX4EvnSHuInCMItEhAGnaSEXRBjtzh9fOtsE6E1G6p7vl7edEeZe11QHf18+6+9gR5PbKV/sGKNaD8YaMeA==", - "requires": { - "follow-redirects": "^1.14.8" - } - }, "balanced-match": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", - "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==" + "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", + "optional": true, + "peer": true + }, + "base-64": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/base-64/-/base-64-0.1.0.tgz", + "integrity": "sha512-Y5gU45svrR5tI2Vt/X9GPd3L0HNIKzGu202EjxrXMpuc2V2CiKgemAbUUsqYmZJvPtCXoUKjNZwBJzsNScUbXA==" }, "base64-js": { "version": "1.5.1", @@ -1315,28 +2231,34 @@ "version": "2.0.1", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz", "integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==", + "optional": true, + "peer": true, "requires": { "balanced-match": "^1.0.0" } }, - "browser-or-node": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/browser-or-node/-/browser-or-node-2.1.1.tgz", - "integrity": "sha512-8CVjaLJGuSKMVTxJ2DpBl5XnlNDiT4cQFeuCJJrvJmts9YrTZDizTX7PjC2s6W4x+MBGZeEY6dGMrF04/6Hgqg==" - }, "buffer": { "version": "6.0.3", "resolved": "https://registry.npmjs.org/buffer/-/buffer-6.0.3.tgz", "integrity": "sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA==", + "optional": true, + "peer": true, "requires": { "base64-js": "^1.3.1", "ieee754": "^1.2.1" } }, + "camelcase": { + "version": "6.3.0", + "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-6.3.0.tgz", + "integrity": "sha512-Gmy6FhYlCY7uOElZUSbxo2UCDH8owEk996gkbrpsgGtrJLM3J7jGxl9Ic7Qwwj4ivOE5AWZWRMecDdF7hqGjFA==" + }, "chalk": { "version": "4.1.2", "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==", + "optional": true, + "peer": true, "requires": { "ansi-styles": "^4.1.0", "supports-color": "^7.1.0" @@ -1346,16 +2268,25 @@ "version": "4.3.0", "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "optional": true, + "peer": true, "requires": { "color-convert": "^2.0.1" } } } }, + "charenc": { + "version": "0.0.2", + "resolved": "https://registry.npmjs.org/charenc/-/charenc-0.0.2.tgz", + "integrity": "sha512-yrLQ/yVUFXkzg7EDQsPieE/53+0RlaWTs+wBrvW36cyilJ2SaDWfl4Yj7MtLTXleV9uEKefbAGUPv2/iWSooRA==" + }, "cli-highlight": { "version": "2.1.11", "resolved": "https://registry.npmjs.org/cli-highlight/-/cli-highlight-2.1.11.tgz", "integrity": "sha512-9KDcoEVwyUXrjcJNvHD0NFc/hiwe/WPVYIleQh2O1N2Zro5gWJZ/K+3DGn8w8P/F6FxOgzyC5bxDyHIgCSPhGg==", + "optional": true, + "peer": true, "requires": { "chalk": "^4.0.0", "highlight.js": "^10.7.1", @@ -1369,6 +2300,8 @@ "version": "7.0.4", "resolved": "https://registry.npmjs.org/cliui/-/cliui-7.0.4.tgz", "integrity": "sha512-OcRE68cOsVMXp1Yvonl/fzkQOyjLSu/8bhPDfQt0e0/Eb283TKP20Fs2MqoPsr9SwA595rRCA+QMzYc9nBP+JQ==", + "optional": true, + "peer": true, "requires": { "string-width": "^4.2.0", "strip-ansi": "^6.0.0", @@ -1379,6 +2312,8 @@ "version": "16.2.0", "resolved": "https://registry.npmjs.org/yargs/-/yargs-16.2.0.tgz", "integrity": "sha512-D1mvvtDG0L5ft/jGWkLpG1+m0eQxOfaBvTNELraWj22wSVUMWxZUvYgJYcKh6jGGIkJFhH4IZPQhR4TKpc8mBw==", + "optional": true, + "peer": true, "requires": { "cliui": "^7.0.2", "escalade": "^3.1.1", @@ -1392,7 +2327,9 @@ "yargs-parser": { "version": "20.2.9", "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.9.tgz", - "integrity": "sha512-y11nGElTIV+CT3Zv9t7VKl+Q3hTQoT9a1Qzezhhl6Rp21gJ/IVTW7Z3y9EWXhuUBC2Shnf+DX0antecpAwSP8w==" + "integrity": "sha512-y11nGElTIV+CT3Zv9t7VKl+Q3hTQoT9a1Qzezhhl6Rp21gJ/IVTW7Z3y9EWXhuUBC2Shnf+DX0antecpAwSP8w==", + "optional": true, + "peer": true } } }, @@ -1400,6 +2337,8 @@ "version": "8.0.1", "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz", "integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==", + "optional": true, + "peer": true, "requires": { "string-width": "^4.2.0", "strip-ansi": "^6.0.1", @@ -1410,6 +2349,8 @@ "version": "2.0.1", "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "optional": true, + "peer": true, "requires": { "color-name": "~1.1.4" } @@ -1417,7 +2358,9 @@ "color-name": { "version": "1.1.4", "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", - "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==" + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", + "optional": true, + "peer": true }, "combined-stream": { "version": "1.0.8", @@ -1427,41 +2370,70 @@ "delayed-stream": "~1.0.0" } }, - "cross-fetch": { - "version": "3.1.5", - "resolved": "https://registry.npmjs.org/cross-fetch/-/cross-fetch-3.1.5.tgz", - "integrity": "sha512-lvb1SBsI0Z7GDwmuid+mU3kWVBwTVUbe7S0H52yaaAdQOXq2YktTCZdlAcNKFzE6QtRz0snpw9bNiPeOIkkQvw==", - "requires": { - "node-fetch": "2.6.7" - } + "commander": { + "version": "10.0.1", + "resolved": "https://registry.npmjs.org/commander/-/commander-10.0.1.tgz", + "integrity": "sha512-y4Mg2tXshplEbSGzx7amzPwKKOCGuoSRP/CjEdwwk0FOGlUbq6lKuoyDZTNZkmxHdJtp54hdfY/JUrdL7Xfdug==" + }, + "crypt": { + "version": "0.0.2", + "resolved": "https://registry.npmjs.org/crypt/-/crypt-0.0.2.tgz", + "integrity": "sha512-mCxBlsHFYh9C+HVpiEacem8FEBnMXgU9gy4zmNC+SXAZNB/1idgp/aulFJ4FgCi7GPEVbfyng092GqL2k2rmow==" }, "debug": { "version": "4.3.4", "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz", "integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==", + "optional": true, + "peer": true, "requires": { "ms": "2.1.2" } }, + "decamelize": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/decamelize/-/decamelize-1.2.0.tgz", + "integrity": "sha512-z2S+W9X73hAUUki+N+9Za2lBlun89zigOyGrsax+KUQ6wKW4ZoWpEYBkGhQjwAjjDCkWxhY0VKEhk8wzY7F5cA==" + }, "delayed-stream": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==" }, + "digest-fetch": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/digest-fetch/-/digest-fetch-1.3.0.tgz", + "integrity": "sha512-CGJuv6iKNM7QyZlM2T3sPAdZWd/p9zQiRNS9G+9COUCwzWFTs0Xp8NF5iePx7wtvhDykReiRRrSeNb4oMmB8lA==", + "requires": { + "base-64": "^0.1.0", + "md5": "^2.3.0" + } + }, "dotenv": { "version": "16.0.3", "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.0.3.tgz", - "integrity": "sha512-7GO6HghkA5fYG9TYnNxi14/7K9f5occMlp3zXAuSxn7CKCxt9xbNWG7yF8hTCSUchlfWSe3uLmlPfigevRItzQ==" + "integrity": "sha512-7GO6HghkA5fYG9TYnNxi14/7K9f5occMlp3zXAuSxn7CKCxt9xbNWG7yF8hTCSUchlfWSe3uLmlPfigevRItzQ==", + "optional": true, + "peer": true }, "emoji-regex": { "version": "8.0.0", "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==" + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "optional": true, + "peer": true }, "escalade": { "version": "3.1.1", "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.1.tgz", - "integrity": "sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw==" + "integrity": "sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw==", + "optional": true, + "peer": true + }, + "event-target-shim": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz", + "integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==" }, "eventemitter3": { "version": "4.0.7", @@ -1478,11 +2450,6 @@ "resolved": "https://registry.npmjs.org/flat/-/flat-5.0.2.tgz", "integrity": "sha512-b6suED+5/3rTpUBdG1gupIl8MPFCAMA0QXwmljLhvCUKcUvdE4gWky9zpuGCcXHOsz4J9wPGNWq6OKpmIzz3hQ==" }, - "follow-redirects": { - "version": "1.15.6", - "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz", - "integrity": "sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==" - }, "form-data": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz", @@ -1493,20 +2460,47 @@ "mime-types": "^2.1.12" } }, + "form-data-encoder": { + "version": "1.7.2", + "resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-1.7.2.tgz", + "integrity": "sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==" + }, + "formdata-node": { + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/formdata-node/-/formdata-node-4.4.1.tgz", + "integrity": "sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==", + "requires": { + "node-domexception": "1.0.0", + "web-streams-polyfill": "4.0.0-beta.3" + }, + "dependencies": { + "web-streams-polyfill": { + "version": "4.0.0-beta.3", + "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-4.0.0-beta.3.tgz", + "integrity": "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==" + } + } + }, "fs.realpath": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", - "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==" + "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==", + "optional": true, + "peer": true }, "get-caller-file": { "version": "2.0.5", "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", - "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==" + "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==", + "optional": true, + "peer": true }, "glob": { "version": "8.1.0", "resolved": "https://registry.npmjs.org/glob/-/glob-8.1.0.tgz", "integrity": "sha512-r8hpEjiQEYlF2QU0df3dS+nxxSIreXQS1qRhMJM0Q5NDdR386C7jb7Hwwod8Fgiuex+k0GFjgft18yvxm5XoCQ==", + "optional": true, + "peer": true, "requires": { "fs.realpath": "^1.0.0", "inflight": "^1.0.4", @@ -1518,22 +2512,38 @@ "has-flag": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", - "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==" + "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", + "optional": true, + "peer": true }, "highlight.js": { "version": "10.7.3", "resolved": "https://registry.npmjs.org/highlight.js/-/highlight.js-10.7.3.tgz", - "integrity": "sha512-tzcUFauisWKNHaRkN4Wjl/ZA07gENAjFl3J/c480dprkGTg5EQstgaNFqBfUqCq54kZRIEcreTsAgF/m2quD7A==" + "integrity": "sha512-tzcUFauisWKNHaRkN4Wjl/ZA07gENAjFl3J/c480dprkGTg5EQstgaNFqBfUqCq54kZRIEcreTsAgF/m2quD7A==", + "optional": true, + "peer": true + }, + "humanize-ms": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/humanize-ms/-/humanize-ms-1.2.1.tgz", + "integrity": "sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==", + "requires": { + "ms": "^2.0.0" + } }, "ieee754": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz", - "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==" + "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==", + "optional": true, + "peer": true }, "inflight": { "version": "1.0.6", "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", "integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==", + "optional": true, + "peer": true, "requires": { "once": "^1.3.0", "wrappy": "1" @@ -1542,17 +2552,42 @@ "inherits": { "version": "2.0.4", "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", - "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==" + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", + "optional": true, + "peer": true }, "is-any-array": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/is-any-array/-/is-any-array-2.0.1.tgz", "integrity": "sha512-UtilS7hLRu++wb/WBAw9bNuP1Eg04Ivn1vERJck8zJthEvXCBEBpGR/33u/xLKWEQf95803oalHrVDptcAvFdQ==" }, + "is-buffer": { + "version": "1.1.6", + "resolved": "https://registry.npmjs.org/is-buffer/-/is-buffer-1.1.6.tgz", + "integrity": "sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w==" + }, "is-fullwidth-code-point": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", - "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==" + "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", + "optional": true, + "peer": true + }, + "js-tiktoken": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/js-tiktoken/-/js-tiktoken-1.0.11.tgz", + "integrity": "sha512-PajXFLq2vx7/8jllQZ43vzNpAai/0MOVdJjW/UrNyJorNQRTjHrqdGJG/mjHVy7h9M6dW6CaG43eNLMYFkTh6w==", + "requires": { + "base64-js": "^1.5.1" + } + }, + "js-yaml": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz", + "integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==", + "requires": { + "argparse": "^2.0.1" + } }, "jsonpointer": { "version": "5.0.1", @@ -1560,27 +2595,55 @@ "integrity": "sha512-p/nXbhSEcu3pZRdkW1OfJhpsVtW1gd4Wa1fnQc9YLiTfAjn0312eMKimbdIQzuZl9aa9xUGaRlP9T/CJE/ditQ==" }, "langchain": { - "version": "0.0.67", - "resolved": "https://registry.npmjs.org/langchain/-/langchain-0.0.67.tgz", - "integrity": "sha512-OO9NEoVYJyNTmrA76rgisA48LkA6Si7qVAS+1hakzKwf/Hj7GhvDe/NpVaWmOFtkAHusJHSbCplbeJKWIgFR2g==", + "version": "0.1.36", + "resolved": "https://registry.npmjs.org/langchain/-/langchain-0.1.36.tgz", + "integrity": "sha512-NTbnCL/jKWIeEI//Nm1oG8nhW3vkYWvEMr1MPotmTThTfeKfO87eV/OAzAyh6Ruy6GFs/qofRgQZGIe6XvXTNQ==", "requires": { - "@anthropic-ai/sdk": "^0.4.3", - "@dqbd/tiktoken": "^1.0.7", - "ansi-styles": "^5.0.0", + "@anthropic-ai/sdk": "^0.9.1", + "@langchain/community": "~0.0.47", + "@langchain/core": "0.1.5", + "@langchain/openai": "~0.0.28", + "@langchain/textsplitters": "~0.0.0", "binary-extensions": "^2.2.0", - "browser-or-node": "^2.1.1", - "expr-eval": "^2.0.2", - "flat": "^5.0.2", + "js-tiktoken": "^1.0.7", + "js-yaml": "^4.1.0", "jsonpointer": "^5.0.1", + "langchainhub": "~0.0.8", + "langsmith": "~0.1.7", "ml-distance": "^4.0.0", - "object-hash": "^3.0.0", - "openai": "^3.2.0", - "p-queue": "^6.6.2", + "openapi-types": "^12.1.3", "p-retry": "4", "uuid": "^9.0.0", "yaml": "^2.2.1", - "zod": "^3.21.4", - "zod-to-json-schema": "^3.20.4" + "zod": "^3.22.4", + "zod-to-json-schema": "^3.22.3" + } + }, + "langchainhub": { + "version": "0.0.8", + "resolved": "https://registry.npmjs.org/langchainhub/-/langchainhub-0.0.8.tgz", + "integrity": "sha512-Woyb8YDHgqqTOZvWIbm2CaFDGfZ4NTSyXV687AG4vXEfoNo7cGQp7nhl7wL3ehenKWmNEmcxCLgOZzW8jE6lOQ==" + }, + "langsmith": { + "version": "0.1.18", + "resolved": "https://registry.npmjs.org/langsmith/-/langsmith-0.1.18.tgz", + "integrity": "sha512-LHk0aIFAl3/iiKvUzAiM8Xdm13bRO70XERQeHCF99fL2X815Jc47nxu6m7usSuQC8sw6rirCKZbGm18cqdUEzA==", + "requires": { + "@types/uuid": "^9.0.1", + "commander": "^10.0.1", + "p-queue": "^6.6.2", + "p-retry": "4", + "uuid": "^9.0.0" + } + }, + "md5": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/md5/-/md5-2.3.0.tgz", + "integrity": "sha512-T1GITYmFaKuO91vxyoQMFETst+O71VUPEU3ze5GNzDm0OWdP8v1ziTaAEPUr/3kLsY3Sftgz242A1SetQiDL7g==", + "requires": { + "charenc": "0.0.2", + "crypt": "0.0.2", + "is-buffer": "~1.1.6" } }, "mime-db": { @@ -1600,6 +2663,8 @@ "version": "5.1.6", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.6.tgz", "integrity": "sha512-lKwV/1brpG6mBUFHtb7NUmtABCb2WZZmm2wNiOA5hAb8VdCS4B3dtMWyvcoViccwAW/COERjXLt0zP1zXUN26g==", + "optional": true, + "peer": true, "requires": { "brace-expansion": "^2.0.1" } @@ -1607,7 +2672,9 @@ "mkdirp": { "version": "2.1.6", "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-2.1.6.tgz", - "integrity": "sha512-+hEnITedc8LAtIP9u3HJDFIdcLV2vXP33sqLLIzkv1Db1zO/1OxbvYf0Y1OC/S/Qo5dxHXepofhmxL02PsKe+A==" + "integrity": "sha512-+hEnITedc8LAtIP9u3HJDFIdcLV2vXP33sqLLIzkv1Db1zO/1OxbvYf0Y1OC/S/Qo5dxHXepofhmxL02PsKe+A==", + "optional": true, + "peer": true }, "ml-array-mean": { "version": "1.1.6", @@ -1654,20 +2721,32 @@ "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" }, + "mustache": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/mustache/-/mustache-4.2.0.tgz", + "integrity": "sha512-71ippSywq5Yb7/tVYyGbkBggbU8H3u5Rz56fH60jGFgr8uHwxs+aSKeqmluIVzM0m0kB7xQjKS6qPfd0b2ZoqQ==" + }, "mz": { "version": "2.7.0", "resolved": "https://registry.npmjs.org/mz/-/mz-2.7.0.tgz", "integrity": "sha512-z81GNO7nnYMEhrGh9LeymoE4+Yr0Wn5McHIZMK5cfQCl+NDX08sCZgUc9/6MHni9IWuFLm1Z3HTCXu2z9fN62Q==", + "optional": true, + "peer": true, "requires": { "any-promise": "^1.0.0", "object-assign": "^4.0.1", "thenify-all": "^1.0.0" } }, + "node-domexception": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz", + "integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==" + }, "node-fetch": { - "version": "2.6.7", - "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.7.tgz", - "integrity": "sha512-ZjMPFEfVx5j+y2yF35Kzx5sF7kDzxuDj6ziH4FFbOp87zKDZNx8yExJIb05OGF4Nlt9IHFIMBkRl41VdvcNdbQ==", + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", + "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", "requires": { "whatwg-url": "^5.0.0" } @@ -1680,30 +2759,40 @@ "object-assign": { "version": "4.1.1", "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", - "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==" - }, - "object-hash": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/object-hash/-/object-hash-3.0.0.tgz", - "integrity": "sha512-RSn9F68PjH9HqtltsSnqYC1XXoWe9Bju5+213R98cNGttag9q9yAOTzdbsqvIa7aNm5WffBZFpWYr2aWrklWAw==" + "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==", + "optional": true, + "peer": true }, "once": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", + "optional": true, + "peer": true, "requires": { "wrappy": "1" } }, "openai": { - "version": "3.2.1", - "resolved": "https://registry.npmjs.org/openai/-/openai-3.2.1.tgz", - "integrity": "sha512-762C9BNlJPbjjlWZi4WYK9iM2tAVAv0uUp1UmI34vb0CN5T2mjB/qM6RYBmNKMh/dN9fC+bxqPwWJZUTWW052A==", + "version": "4.38.5", + "resolved": "https://registry.npmjs.org/openai/-/openai-4.38.5.tgz", + "integrity": "sha512-Ym5GJL98ZhLJJ7enBx53jjG3vwN/fsB+Ozh46nnRZZS9W1NiYqbwkJ+sXd3dkCIiWIgcyyOPL2Zr8SQAzbpj3g==", "requires": { - "axios": "^0.26.0", - "form-data": "^4.0.0" + "@types/node": "^18.11.18", + "@types/node-fetch": "^2.6.4", + "abort-controller": "^3.0.0", + "agentkeepalive": "^4.2.1", + "form-data-encoder": "1.7.2", + "formdata-node": "^4.3.2", + "node-fetch": "^2.6.7", + "web-streams-polyfill": "^3.2.1" } }, + "openapi-types": { + "version": "12.1.3", + "resolved": "https://registry.npmjs.org/openapi-types/-/openapi-types-12.1.3.tgz", + "integrity": "sha512-N4YtSYJqghVu4iek2ZUvcN/0aqH1kRDuNqzcycDxhOUpg7GdvLa2F3DgS6yBNhInhv2r/6I0Flkn7CqL8+nIcw==" + }, "p-finally": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/p-finally/-/p-finally-1.0.0.tgz", @@ -1738,12 +2827,16 @@ "parse5": { "version": "5.1.1", "resolved": "https://registry.npmjs.org/parse5/-/parse5-5.1.1.tgz", - "integrity": "sha512-ugq4DFI0Ptb+WWjAdOK16+u/nHfiIrcE+sh8kZMaM0WllQKLI9rOUq6c2b7cwPkXdzfQESqvoqK6ug7U/Yyzug==" + "integrity": "sha512-ugq4DFI0Ptb+WWjAdOK16+u/nHfiIrcE+sh8kZMaM0WllQKLI9rOUq6c2b7cwPkXdzfQESqvoqK6ug7U/Yyzug==", + "optional": true, + "peer": true }, "parse5-htmlparser2-tree-adapter": { "version": "6.0.1", "resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-6.0.1.tgz", "integrity": "sha512-qPuWvbLgvDGilKc5BoicRovlT4MtYT6JfJyBOMDsKoiT+GiuP5qyrPCnR9HcPECIJJmZh5jRndyNThnhhb/vlA==", + "optional": true, + "peer": true, "requires": { "parse5": "^6.0.1" }, @@ -1751,19 +2844,25 @@ "parse5": { "version": "6.0.1", "resolved": "https://registry.npmjs.org/parse5/-/parse5-6.0.1.tgz", - "integrity": "sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw==" + "integrity": "sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw==", + "optional": true, + "peer": true } } }, "reflect-metadata": { "version": "0.1.13", "resolved": "https://registry.npmjs.org/reflect-metadata/-/reflect-metadata-0.1.13.tgz", - "integrity": "sha512-Ts1Y/anZELhSsjMcU605fU9RE4Oi3p5ORujwbIKXfWa+0Zxs510Qrmrce5/Jowq3cHSZSJqBjypxmHarc+vEWg==" + "integrity": "sha512-Ts1Y/anZELhSsjMcU605fU9RE4Oi3p5ORujwbIKXfWa+0Zxs510Qrmrce5/Jowq3cHSZSJqBjypxmHarc+vEWg==", + "optional": true, + "peer": true }, "require-directory": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", - "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==" + "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==", + "optional": true, + "peer": true }, "retry": { "version": "0.13.1", @@ -1773,12 +2872,16 @@ "safe-buffer": { "version": "5.2.1", "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", - "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==" + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", + "optional": true, + "peer": true }, "sha.js": { "version": "2.4.11", "resolved": "https://registry.npmjs.org/sha.js/-/sha.js-2.4.11.tgz", "integrity": "sha512-QMEp5B7cftE7APOjk5Y6xgrbWu+WkLVQwk8JNjZ8nKRciZaByEW6MubieAiToS7+dwvrjGhH8jRXz3MVd0AYqQ==", + "optional": true, + "peer": true, "requires": { "inherits": "^2.0.1", "safe-buffer": "^5.0.1" @@ -1788,6 +2891,8 @@ "version": "4.2.3", "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "optional": true, + "peer": true, "requires": { "emoji-regex": "^8.0.0", "is-fullwidth-code-point": "^3.0.0", @@ -1798,6 +2903,8 @@ "version": "6.0.1", "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "optional": true, + "peer": true, "requires": { "ansi-regex": "^5.0.1" } @@ -1806,6 +2913,8 @@ "version": "7.2.0", "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", + "optional": true, + "peer": true, "requires": { "has-flag": "^4.0.0" } @@ -1814,6 +2923,8 @@ "version": "3.3.1", "resolved": "https://registry.npmjs.org/thenify/-/thenify-3.3.1.tgz", "integrity": "sha512-RVZSIV5IG10Hk3enotrhvz0T9em6cyHBLkH/YAZuKqd8hRkKhSfCGIcP2KUY0EPxndzANBmNllzWPwak+bheSw==", + "optional": true, + "peer": true, "requires": { "any-promise": "^1.0.0" } @@ -1822,6 +2933,8 @@ "version": "1.6.0", "resolved": "https://registry.npmjs.org/thenify-all/-/thenify-all-1.6.0.tgz", "integrity": "sha512-RNxQH/qI8/t3thXJDwcstUO4zeqo64+Uy/+sNVRBx4Xn2OX+OZ9oP+iJnNFqplFra2ZUVeKCSa2oVWi3T4uVmA==", + "optional": true, + "peer": true, "requires": { "thenify": ">= 3.1.0 < 4" } @@ -1834,12 +2947,16 @@ "tslib": { "version": "2.5.0", "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.0.tgz", - "integrity": "sha512-336iVw3rtn2BUK7ORdIAHTyxHGRIHVReokCR3XjbckJMK7ms8FysBfhLR8IXnAgy7T0PTPNBWKiH514FOW/WSg==" + "integrity": "sha512-336iVw3rtn2BUK7ORdIAHTyxHGRIHVReokCR3XjbckJMK7ms8FysBfhLR8IXnAgy7T0PTPNBWKiH514FOW/WSg==", + "optional": true, + "peer": true }, "typeorm": { "version": "0.3.15", "resolved": "https://registry.npmjs.org/typeorm/-/typeorm-0.3.15.tgz", "integrity": "sha512-R4JSw8QjDP1W+ypeRz/XrCXIqubrLSnNAzJAp9EQSQIPHTv+YmUHZis8g08lOwFpuhqL9m8jkPSz8GWEKlU/ow==", + "optional": true, + "peer": true, "requires": { "@sqltools/formatter": "^1.2.5", "app-root-path": "^3.1.0", @@ -1868,6 +2985,11 @@ "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.0.tgz", "integrity": "sha512-MXcSTerfPa4uqyzStbRoTgt5XIe3x5+42+q1sDuy3R5MDk66URdLMOZe5aPX/SQd+kuYAh0FdP/pO28IkQyTeg==" }, + "web-streams-polyfill": { + "version": "3.3.3", + "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.3.3.tgz", + "integrity": "sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==" + }, "webidl-conversions": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", @@ -1886,6 +3008,8 @@ "version": "7.0.0", "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", + "optional": true, + "peer": true, "requires": { "ansi-styles": "^4.0.0", "string-width": "^4.1.0", @@ -1896,6 +3020,8 @@ "version": "4.3.0", "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "optional": true, + "peer": true, "requires": { "color-convert": "^2.0.1" } @@ -1905,12 +3031,16 @@ "wrappy": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", - "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==" + "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", + "optional": true, + "peer": true }, "y18n": { "version": "5.0.8", "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", - "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==" + "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==", + "optional": true, + "peer": true }, "yaml": { "version": "2.2.2", @@ -1921,6 +3051,8 @@ "version": "17.7.2", "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz", "integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==", + "optional": true, + "peer": true, "requires": { "cliui": "^8.0.1", "escalade": "^3.1.1", @@ -1934,17 +3066,19 @@ "yargs-parser": { "version": "21.1.1", "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz", - "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==" + "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==", + "optional": true, + "peer": true }, "zod": { - "version": "3.21.4", - "resolved": "https://registry.npmjs.org/zod/-/zod-3.21.4.tgz", - "integrity": "sha512-m46AKbrzKVzOzs/DZgVnG5H55N1sv1M8qZU3A8RIKbs3mrACDNeIOeilDymVb2HdmP8uwshOCF4uJ8uM9rCqJw==" + "version": "3.23.4", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.23.4.tgz", + "integrity": "sha512-/AtWOKbBgjzEYYQRNfoGKHObgfAZag6qUJX1VbHo2PRBgS+wfWagEY2mizjfyAPcGesrJOcx/wcl0L9WnVrHFw==" }, "zod-to-json-schema": { - "version": "3.21.0", - "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.21.0.tgz", - "integrity": "sha512-+KyFCzqKwE6CxMSZxEUBaGmdXzB09BoFebO+xef/ISE4cTfReQlyThYbS8aqd3uWkdt9fz5BGHsY0CbY+Ra9oA==", + "version": "3.23.0", + "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.23.0.tgz", + "integrity": "sha512-az0uJ243PxsRIa2x1WmNE/pnuA05gUq/JB8Lwe1EDCCL/Fz9MgjYQ0fPlyc2Tcv6aF2ZA7WM5TWaRZVEFaAIag==", "requires": {} } } diff --git a/examples/langchain/langchainjs-localai-example/package.json b/examples/langchain/langchainjs-localai-example/package.json index a6023c03..1d65575d 100644 --- a/examples/langchain/langchainjs-localai-example/package.json +++ b/examples/langchain/langchainjs-localai-example/package.json @@ -1,6 +1,6 @@ { "name": "langchainjs-localai-example", - "version": "0.1.0", + "version": "0.1.1", "description": "Trivial Example of using langchain + the OpenAI API + LocalAI together", "main": "index.mjs", "scripts": { @@ -15,7 +15,11 @@ "typescript": "^5.0.4" }, "dependencies": { - "langchain": "^0.0.67", - "typeorm": "^0.3.15" + "@langchain/community": "^0.0.52", + "@langchain/openai": "^0.0.28", + "langchain": "^0.1.36" + }, + "overrides": { + "@langchain/core": "0.1.5" } } diff --git a/examples/langchain/langchainjs-localai-example/src/index.mts b/examples/langchain/langchainjs-localai-example/src/index.mts index 11faa384..995c2832 100644 --- a/examples/langchain/langchainjs-localai-example/src/index.mts +++ b/examples/langchain/langchainjs-localai-example/src/index.mts @@ -1,15 +1,17 @@ -import { OpenAIChat } from "langchain/llms/openai"; import { loadQAStuffChain } from "langchain/chains"; import { Document } from "langchain/document"; -import { initializeAgentExecutorWithOptions } from "langchain/agents"; -import {Calculator} from "langchain/tools/calculator"; +import { pull } from "langchain/hub"; +import { AgentExecutor, createOpenAIToolsAgent } from "langchain/agents"; +import {Calculator} from "@langchain/community/tools/calculator"; +import { ChatOpenAI } from "@langchain/openai"; +import type { ChatPromptTemplate } from "@langchain/core/prompts"; const pathToLocalAI = process.env['OPENAI_API_BASE'] || 'http://api:8080/v1'; const fakeApiKey = process.env['OPENAI_API_KEY'] || '-'; const modelName = process.env['MODEL_NAME'] || 'gpt-3.5-turbo'; -function getModel(): OpenAIChat { - return new OpenAIChat({ +function getModel(): ChatOpenAI { + return new ChatOpenAI({ prefixMessages: [ { role: "system", @@ -29,8 +31,8 @@ function getModel(): OpenAIChat { // Minimal example. export const run = async () => { const model = getModel(); - console.log(`about to model.call at ${new Date().toUTCString()}`); - const res = await model.call( + console.log(`about to model.invoke at ${new Date().toUTCString()}`); + const res = await model.invoke( "What would be a good company name a company that makes colorful socks?" ); console.log(`${new Date().toUTCString()}`); @@ -47,7 +49,7 @@ export const run2 = async () => { new Document({ pageContent: "Harrison went to Harvard." }), new Document({ pageContent: "Ankush went to Princeton." }), ]; - const resA = await chainA.call({ + const resA = await chainA.invoke({ input_documents: docs, question: "Where did Harrison go to college?", }); @@ -58,22 +60,33 @@ await run2(); // Quickly thrown together example of using tools + agents. // This seems like it should work, but it doesn't yet. -export const temporarilyBrokenToolTest = async () => { +export const toolAgentTest = async () => { const model = getModel(); - const executor = await initializeAgentExecutorWithOptions([new Calculator(true)], model, { - agentType: "zero-shot-react-description", + const prompt = await pull("hwchase17/openai-tools-agent"); + + const tools = [new Calculator()]; + + const agent = await createOpenAIToolsAgent({ + llm: model, + tools: tools, + prompt: prompt }); console.log("Loaded agent."); + const agentExecutor = new AgentExecutor({ + agent, + tools, + }); + const input = `What is the value of (500 *2) + 350 - 13?`; console.log(`Executing with input "${input}"...`); - const result = await executor.call({ input }); + const result = await agentExecutor.invoke({ input }); console.log(`Got output ${result.output}`); } -await temporarilyBrokenToolTest(); +await toolAgentTest(); diff --git a/examples/langchain/langchainjs-localai-example/tsconfig.json b/examples/langchain/langchainjs-localai-example/tsconfig.json index 84129d26..5e0d5a58 100644 --- a/examples/langchain/langchainjs-localai-example/tsconfig.json +++ b/examples/langchain/langchainjs-localai-example/tsconfig.json @@ -8,7 +8,8 @@ "esModuleInterop": true, "allowSyntheticDefaultImports": true, "isolatedModules": true, - "outDir": "./dist" + "outDir": "./dist", + "skipLibCheck": true }, "include": ["src", "test"], "exclude": ["node_modules", "dist"] From 6b411ae2129e7520c0ea03d0685d3eeb788003cf Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 26 Apr 2024 00:48:06 +0200 Subject: [PATCH 0134/2648] models(gallery): add variants of llama3 70b (#2138) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 56e434c5..9cc72d79 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -82,6 +82,24 @@ - filename: Meta-Llama-3-70B-Instruct.Q4_K_M.gguf sha256: d559de8dd806a76dbd29f8d8bd04666f2b29e7c7872d8e8481abd07805884d72 uri: huggingface://MaziyarPanahi/Meta-Llama-3-70B-Instruct-GGUF/Meta-Llama-3-70B-Instruct.Q4_K_M.gguf +- <<: *llama3 + name: "llama3-70b-instruct:IQ1_M" + overrides: + parameters: + model: Meta-Llama-3-70B-Instruct.IQ1_M.gguf + files: + - filename: Meta-Llama-3-70B-Instruct.IQ1_M.gguf + sha256: cdbe8ac2126a70fa0af3fac7a4fe04f1c76330c50eba8383567587b48b328098 + uri: huggingface://MaziyarPanahi/Meta-Llama-3-70B-Instruct-GGUF/Meta-Llama-3-70B-Instruct.IQ1_M.gguf +- <<: *llama3 + name: "llama3-70b-instruct:IQ1_S" + overrides: + parameters: + model: Meta-Llama-3-70B-Instruct.IQ1_S.gguf + files: + - filename: Meta-Llama-3-70B-Instruct.IQ1_S.gguf + sha256: 3797a69f1bdf53fabf9f3a3a8c89730b504dd3209406288515c9944c14093048 + uri: huggingface://MaziyarPanahi/Meta-Llama-3-70B-Instruct-GGUF/Meta-Llama-3-70B-Instruct.IQ1_S.gguf - <<: *llama3 name: "llama-3-sauerkrautlm-8b-instruct" urls: From 44bc540bb5da107c38613290d23736bf59da13bd Mon Sep 17 00:00:00 2001 From: Dave Date: Fri, 26 Apr 2024 04:33:12 -0400 Subject: [PATCH 0135/2648] fix: security scanner dislikes `runCommand` function arguments (#2140) runCommand ==> ffmpegCommand. No functional changes, but makes it clear to the security scanner and future developers that this function cannot run arbitrary commands Signed-off-by: Dave Lee --- backend/go/transcribe/transcript.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/backend/go/transcribe/transcript.go b/backend/go/transcribe/transcript.go index fdfaa974..74833e4d 100644 --- a/backend/go/transcribe/transcript.go +++ b/backend/go/transcribe/transcript.go @@ -11,8 +11,8 @@ import ( "github.com/go-skynet/LocalAI/core/schema" ) -func runCommand(command []string) (string, error) { - cmd := exec.Command(command[0], command[1:]...) +func ffmpegCommand(args []string) (string, error) { + cmd := exec.Command("ffmpeg", args...) // Constrain this to ffmpeg to permit security scanner to see that the command is safe. cmd.Env = os.Environ() out, err := cmd.CombinedOutput() return string(out), err @@ -21,8 +21,8 @@ func runCommand(command []string) (string, error) { // AudioToWav converts audio to wav for transcribe. // TODO: use https://github.com/mccoyst/ogg? func audioToWav(src, dst string) error { - command := []string{"ffmpeg", "-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst} - out, err := runCommand(command) + commandArgs := []string{"-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst} + out, err := ffmpegCommand(commandArgs) if err != nil { return fmt.Errorf("error: %w out: %s", err, out) } From 2cd4936c997187c7422ba36167f33323b5cf19f7 Mon Sep 17 00:00:00 2001 From: Dave Date: Fri, 26 Apr 2024 04:34:31 -0400 Subject: [PATCH 0136/2648] fix: security scanner warning noise: error handlers part 1 (#2141) first group of error handlers to reduce security scanner warning noise level Signed-off-by: Dave Lee --- core/backend/options.go | 11 ++++++++--- core/startup/startup.go | 5 ++++- embedded/embedded.go | 6 +++++- examples/semantic-todo/main.go | 5 ++++- main.go | 6 +++++- pkg/assets/list.go | 7 ++++++- pkg/grpc/server.go | 4 ++-- 7 files changed, 34 insertions(+), 10 deletions(-) diff --git a/core/backend/options.go b/core/backend/options.go index bbb9990d..4a7435e6 100644 --- a/core/backend/options.go +++ b/core/backend/options.go @@ -7,7 +7,8 @@ import ( "github.com/go-skynet/LocalAI/core/config" pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" - model "github.com/go-skynet/LocalAI/pkg/model" + "github.com/go-skynet/LocalAI/pkg/model" + "github.com/rs/zerolog/log" ) func modelOpts(c config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option { @@ -109,8 +110,12 @@ func gRPCPredictOpts(c config.BackendConfig, modelPath string) *pb.PredictOption promptCachePath := "" if c.PromptCachePath != "" { p := filepath.Join(modelPath, c.PromptCachePath) - os.MkdirAll(filepath.Dir(p), 0750) - promptCachePath = p + err := os.MkdirAll(filepath.Dir(p), 0750) + if err == nil { + promptCachePath = p + } else { + log.Error().Err(err).Str("promptCachePath", promptCachePath).Msg("error creating prompt cache folder") + } } return &pb.PredictOptions{ diff --git a/core/startup/startup.go b/core/startup/startup.go index b9e95ebf..17bbf9f5 100644 --- a/core/startup/startup.go +++ b/core/startup/startup.go @@ -122,7 +122,10 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode // Watch the configuration directory // If the directory does not exist, we don't watch it configHandler := newConfigFileHandler(options) - configHandler.Watch() + err = configHandler.Watch() + if err != nil { + log.Error().Err(err).Msg("error establishing configuration directory watcher") + } log.Info().Msg("core/startup process completed!") return cl, ml, options, nil diff --git a/embedded/embedded.go b/embedded/embedded.go index c779fc26..438a1352 100644 --- a/embedded/embedded.go +++ b/embedded/embedded.go @@ -7,6 +7,7 @@ import ( "strings" "github.com/go-skynet/LocalAI/pkg/downloader" + "github.com/rs/zerolog/log" "github.com/go-skynet/LocalAI/pkg/assets" "gopkg.in/yaml.v3" @@ -29,7 +30,10 @@ func ModelShortURL(s string) string { } func init() { - yaml.Unmarshal(modelLibrary, &modelShorteners) + err := yaml.Unmarshal(modelLibrary, &modelShorteners) + if err != nil { + log.Error().Err(err).Msg("error while unmarshalling embedded modelLibrary") + } } func GetRemoteLibraryShorteners(url string) (map[string]string, error) { diff --git a/examples/semantic-todo/main.go b/examples/semantic-todo/main.go index 371fe6b9..a8936ea1 100644 --- a/examples/semantic-todo/main.go +++ b/examples/semantic-todo/main.go @@ -239,7 +239,10 @@ func (app *App) updateUI() { task := Task{Description: inputField.GetText()} app.tasks = append(app.tasks, task) app.state = StateRoot - postTasksToExternalService([]Task{task}) + err := postTasksToExternalService([]Task{task}) + if err != nil { + panic(err) + } } app.updateUI() }) diff --git a/main.go b/main.go index 04f13d3f..8fb50184 100644 --- a/main.go +++ b/main.go @@ -43,7 +43,11 @@ func main() { for _, envFile := range envFiles { if _, err := os.Stat(envFile); err == nil { log.Info().Str("envFile", envFile).Msg("loading environment variables from file") - godotenv.Load(envFile) + err = godotenv.Load(envFile) + if err != nil { + log.Error().Err(err).Str("envFile", envFile).Msg("failed to load environment variables from file") + continue + } } } diff --git a/pkg/assets/list.go b/pkg/assets/list.go index 7b705b49..47e60a40 100644 --- a/pkg/assets/list.go +++ b/pkg/assets/list.go @@ -3,10 +3,12 @@ package assets import ( "embed" "io/fs" + + "github.com/rs/zerolog/log" ) func ListFiles(content embed.FS) (files []string) { - fs.WalkDir(content, ".", func(path string, d fs.DirEntry, err error) error { + err := fs.WalkDir(content, ".", func(path string, d fs.DirEntry, err error) error { if err != nil { return err } @@ -18,5 +20,8 @@ func ListFiles(content embed.FS) (files []string) { files = append(files, path) return nil }) + if err != nil { + log.Error().Err(err).Msg("error walking the embedded filesystem") + } return } diff --git a/pkg/grpc/server.go b/pkg/grpc/server.go index 8116241f..8e9f4d68 100644 --- a/pkg/grpc/server.go +++ b/pkg/grpc/server.go @@ -131,10 +131,10 @@ func (s *server) PredictStream(in *pb.PredictOptions, stream pb.Backend_PredictS done <- true }() - s.llm.PredictStream(in, resultChan) + err := s.llm.PredictStream(in, resultChan) <-done - return nil + return err } func (s *server) TokenizeString(ctx context.Context, in *pb.PredictOptions) (*pb.TokenizationResponse, error) { From 006306b183e006c35f109b3d5cb7fc059b9b4229 Mon Sep 17 00:00:00 2001 From: Dave Date: Fri, 26 Apr 2024 04:34:50 -0400 Subject: [PATCH 0137/2648] fix: use bluemonday as recommended by blackfriday (#2142) use bluemonday as recommended by blackfriday Signed-off-by: Dave Lee --- core/http/render.go | 3 ++- go.mod | 10 +++++----- go.sum | 10 ++++++++++ 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/core/http/render.go b/core/http/render.go index 8f1b36c6..1becf0b2 100644 --- a/core/http/render.go +++ b/core/http/render.go @@ -10,6 +10,7 @@ import ( "github.com/go-skynet/LocalAI/core/schema" "github.com/gofiber/fiber/v2" fiberhtml "github.com/gofiber/template/html/v2" + "github.com/microcosm-cc/bluemonday" "github.com/russross/blackfriday" ) @@ -39,5 +40,5 @@ func renderEngine() *fiberhtml.Engine { func markDowner(args ...interface{}) template.HTML { s := blackfriday.MarkdownCommon([]byte(fmt.Sprintf("%s", args...))) - return template.HTML(s) + return template.HTML(bluemonday.UGCPolicy().Sanitize(string(s))) } diff --git a/go.mod b/go.mod index 9485383e..15846cd4 100644 --- a/go.mod +++ b/go.mod @@ -93,7 +93,7 @@ require ( github.com/golang/protobuf v1.5.3 // indirect github.com/golang/snappy v0.0.2 // indirect github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect - github.com/gorilla/css v1.0.0 // indirect + github.com/gorilla/css v1.0.1 // indirect github.com/huandu/xstrings v1.3.3 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/klauspost/pgzip v1.2.5 // indirect @@ -133,9 +133,9 @@ require ( github.com/yuin/goldmark-emoji v1.0.2 // indirect go.opentelemetry.io/otel/sdk v1.19.0 // indirect go.opentelemetry.io/otel/trace v1.19.0 // indirect - golang.org/x/crypto v0.21.0 // indirect + golang.org/x/crypto v0.22.0 // indirect golang.org/x/mod v0.16.0 // indirect - golang.org/x/term v0.18.0 // indirect + golang.org/x/term v0.19.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d // indirect gopkg.in/fsnotify.v1 v1.4.7 // indirect gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect @@ -161,8 +161,8 @@ require ( github.com/rivo/uniseg v0.2.0 // indirect github.com/valyala/bytebufferpool v1.0.0 // indirect github.com/valyala/tcplisten v1.0.0 // indirect - golang.org/x/net v0.22.0 // indirect - golang.org/x/sys v0.18.0 // indirect + golang.org/x/net v0.24.0 // indirect + golang.org/x/sys v0.19.0 // indirect golang.org/x/text v0.14.0 // indirect golang.org/x/tools v0.19.0 // indirect ) diff --git a/go.sum b/go.sum index b68834b2..1ca56a72 100644 --- a/go.sum +++ b/go.sum @@ -146,6 +146,8 @@ github.com/google/uuid v1.5.0 h1:1p67kYwdtXjb0gL0BPiP1Av9wiZPo5A8z2cWkTZ+eyU= github.com/google/uuid v1.5.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gorilla/css v1.0.0 h1:BQqNyPTi50JCFMTw/b67hByjMVXZRwGha6wxVGkeihY= github.com/gorilla/css v1.0.0/go.mod h1:Dn721qIggHpt4+EFCcTLTU/vk5ySda2ReITrtgBl60c= +github.com/gorilla/css v1.0.1 h1:ntNaBIghp6JmvWnxbZKANoLyuXTPZ4cAMlo6RyhlbO8= +github.com/gorilla/css v1.0.1/go.mod h1:BvnYkspnSzMmwRK+b8/xgNPLiIuNZr6vbZBTPQ2A3b0= github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA= github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo= @@ -377,6 +379,8 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4= golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA= golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= +golang.org/x/crypto v0.22.0 h1:g1v0xeRhjcugydODzvb3mEM9SQ0HGp9s/nh3COQ/C30= +golang.org/x/crypto v0.22.0/go.mod h1:vr6Su+7cTlO45qkww3VDJlzDn0ctJvRgYbC2NvXHt+M= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= @@ -395,6 +399,8 @@ golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY= golang.org/x/net v0.22.0 h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc= golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= +golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w= +golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -431,12 +437,16 @@ golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o= +golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc= golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U= golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8= golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58= +golang.org/x/term v0.19.0 h1:+ThwsDv+tYfnJFhF4L8jITxu1tdTWRTZpdsWgEgjL6Q= +golang.org/x/term v0.19.0/go.mod h1:2CuTdWZ7KHSQwUzKva0cbMg6q2DMI3Mmxp+gKJbskEk= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= From c9451cb604d9325fadf678e1e511d7e8e5367e8a Mon Sep 17 00:00:00 2001 From: fakezeta Date: Fri, 26 Apr 2024 16:20:43 +0200 Subject: [PATCH 0138/2648] Bump oneapi-basekit, optimum and openvino (#2139) * Bump oneapi-basekit, optimum and openvino * Changed PERFORMANCE HINT to CUMULATIVE_THROUGHPUT Minor latency change for first token but about 10-15% speedup on token generation. --- .github/workflows/image-pr.yml | 4 ++-- .github/workflows/image.yml | 12 ++++++------ Makefile | 4 ++-- .../python/common-env/transformers/transformers.yml | 7 ++++--- backend/python/transformers/transformers_server.py | 2 +- 5 files changed, 15 insertions(+), 14 deletions(-) diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml index 9c4fece7..3df5cd5a 100644 --- a/.github/workflows/image-pr.yml +++ b/.github/workflows/image-pr.yml @@ -68,7 +68,7 @@ jobs: - build-type: 'sycl_f16' platforms: 'linux/amd64' tag-latest: 'false' - base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04" grpc-base-image: "ubuntu:22.04" tag-suffix: 'sycl-f16-ffmpeg' ffmpeg: 'true' @@ -110,7 +110,7 @@ jobs: - build-type: 'sycl_f16' platforms: 'linux/amd64' tag-latest: 'false' - base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04" grpc-base-image: "ubuntu:22.04" tag-suffix: 'sycl-f16-ffmpeg-core' ffmpeg: 'true' diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 255c1c65..43b7052e 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -148,7 +148,7 @@ jobs: - build-type: 'sycl_f16' platforms: 'linux/amd64' tag-latest: 'auto' - base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04" grpc-base-image: "ubuntu:22.04" tag-suffix: '-sycl-f16-ffmpeg' ffmpeg: 'true' @@ -161,7 +161,7 @@ jobs: - build-type: 'sycl_f32' platforms: 'linux/amd64' tag-latest: 'auto' - base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04" grpc-base-image: "ubuntu:22.04" tag-suffix: '-sycl-f32-ffmpeg' ffmpeg: 'true' @@ -175,7 +175,7 @@ jobs: - build-type: 'sycl_f16' platforms: 'linux/amd64' tag-latest: 'false' - base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04" grpc-base-image: "ubuntu:22.04" tag-suffix: '-sycl-f16-core' ffmpeg: 'false' @@ -185,7 +185,7 @@ jobs: - build-type: 'sycl_f32' platforms: 'linux/amd64' tag-latest: 'false' - base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04" grpc-base-image: "ubuntu:22.04" tag-suffix: '-sycl-f32-core' ffmpeg: 'false' @@ -195,7 +195,7 @@ jobs: - build-type: 'sycl_f16' platforms: 'linux/amd64' tag-latest: 'false' - base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04" grpc-base-image: "ubuntu:22.04" tag-suffix: '-sycl-f16-ffmpeg-core' ffmpeg: 'true' @@ -205,7 +205,7 @@ jobs: - build-type: 'sycl_f32' platforms: 'linux/amd64' tag-latest: 'false' - base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04" grpc-base-image: "ubuntu:22.04" tag-suffix: '-sycl-f32-ffmpeg-core' ffmpeg: 'true' diff --git a/Makefile b/Makefile index 7d64ad03..3ebe13f3 100644 --- a/Makefile +++ b/Makefile @@ -707,7 +707,7 @@ docker-aio-all: docker-image-intel: docker build \ - --build-arg BASE_IMAGE=intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04 \ + --build-arg BASE_IMAGE=intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04 \ --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ --build-arg GO_TAGS="none" \ --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \ @@ -715,7 +715,7 @@ docker-image-intel: docker-image-intel-xpu: docker build \ - --build-arg BASE_IMAGE=intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04 \ + --build-arg BASE_IMAGE=intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04 \ --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ --build-arg GO_TAGS="none" \ --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \ diff --git a/backend/python/common-env/transformers/transformers.yml b/backend/python/common-env/transformers/transformers.yml index 5c069dd0..5f4e85b9 100644 --- a/backend/python/common-env/transformers/transformers.yml +++ b/backend/python/common-env/transformers/transformers.yml @@ -60,9 +60,10 @@ dependencies: - networkx - numpy==1.26.0 - onnx==1.15.0 - - openvino==2024.0.0 - - openvino-telemetry==2023.2.1 - - optimum[openvino]==1.17.1 + - openvino==2024.1.0 + - openvino-telemetry==2024.1.0 + - optimum[openvino]==1.19.1 + - optimum-intel==1.16.1 - packaging==23.2 - pandas - peft==0.5.0 diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py index 2f4140c2..a27c24da 100755 --- a/backend/python/transformers/transformers_server.py +++ b/backend/python/transformers/transformers_server.py @@ -150,7 +150,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): self.model = OVModelForCausalLM.from_pretrained(model_name, compile=True, trust_remote_code=request.TrustRemoteCode, - ov_config={"PERFORMANCE_HINT": "LATENCY"}, + ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT"}, device=device_map) self.OV = True else: From 2dc1fa247424ad962cdddbc1d440ae7c56c32095 Mon Sep 17 00:00:00 2001 From: Dave Date: Fri, 26 Apr 2024 12:46:35 -0400 Subject: [PATCH 0139/2648] fix: `config_file_watcher.go` - root all file reads for safety (#2144) callHandler() now has all file access rooted within DynamicConfigDir Signed-off-by: Dave Lee --- core/startup/config_file_watcher.go | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/core/startup/config_file_watcher.go b/core/startup/config_file_watcher.go index 800059d0..6bbb367f 100644 --- a/core/startup/config_file_watcher.go +++ b/core/startup/config_file_watcher.go @@ -21,7 +21,6 @@ type configFileHandler struct { watcher *fsnotify.Watcher - configDir string appConfig *config.ApplicationConfig } @@ -30,7 +29,6 @@ type configFileHandler struct { func newConfigFileHandler(appConfig *config.ApplicationConfig) configFileHandler { c := configFileHandler{ handlers: make(map[string]fileHandler), - configDir: appConfig.DynamicConfigsDir, appConfig: appConfig, } c.Register("api_keys.json", readApiKeysJson(*appConfig), true) @@ -45,16 +43,17 @@ func (c *configFileHandler) Register(filename string, handler fileHandler, runNo } c.handlers[filename] = handler if runNow { - c.callHandler(path.Join(c.appConfig.DynamicConfigsDir, filename), handler) + c.callHandler(filename, handler) } return nil } func (c *configFileHandler) callHandler(filename string, handler fileHandler) { - log.Trace().Str("filename", filename).Msg("reading file for dynamic config update") - fileContent, err := os.ReadFile(filename) + rootedFilePath := filepath.Join(c.appConfig.DynamicConfigsDir, filepath.Clean(filename)) + log.Trace().Str("filename", rootedFilePath).Msg("reading file for dynamic config update") + fileContent, err := os.ReadFile(rootedFilePath) if err != nil && !os.IsNotExist(err) { - log.Error().Err(err).Str("filename", filename).Msg("could not read file") + log.Error().Err(err).Str("filename", rootedFilePath).Msg("could not read file") } if err = handler(fileContent, c.appConfig); err != nil { @@ -66,7 +65,8 @@ func (c *configFileHandler) Watch() error { configWatcher, err := fsnotify.NewWatcher() c.watcher = configWatcher if err != nil { - log.Fatal().Err(err).Str("configdir", c.configDir).Msg("wnable to create a watcher for configuration directory") + log.Fatal().Err(err).Str("configdir", c.appConfig.DynamicConfigsDir).Msg("unable to create a watcher for configuration directory") + } if c.appConfig.DynamicConfigsDirPollInterval > 0 { @@ -77,7 +77,7 @@ func (c *configFileHandler) Watch() error { <-ticker.C for file, handler := range c.handlers { log.Debug().Str("file", file).Msg("polling config file") - c.callHandler(filepath.Join(c.appConfig.DynamicConfigsDir, file), handler) + c.callHandler(file, handler) } } }() @@ -97,7 +97,7 @@ func (c *configFileHandler) Watch() error { continue } - c.callHandler(event.Name, handler) + c.callHandler(filepath.Base(event.Name), handler) } case err, ok := <-c.watcher.Errors: log.Error().Err(err).Msg("config watcher error received") From 56d843c263ce72409baefe2201130737c052239d Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sat, 27 Apr 2024 01:03:10 +0200 Subject: [PATCH 0140/2648] :arrow_up: Update docs version mudler/LocalAI (#2149) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- docs/data/version.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/data/version.json b/docs/data/version.json index 6a618115..ad22e2aa 100644 --- a/docs/data/version.json +++ b/docs/data/version.json @@ -1,3 +1,3 @@ { - "version": "v2.12.4" + "version": " 🖼️ v2.13.0 - Model gallery edition" } From 030d55599562fbe2586760e93eb47fe58631e60c Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sat, 27 Apr 2024 04:18:28 +0200 Subject: [PATCH 0141/2648] :arrow_up: Update ggerganov/llama.cpp (#2150) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3ebe13f3..15aea6ce 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=46e12c4692a37bdd31a0432fc5153d7d22bc7f72 +CPPLLAMA_VERSION?=928e0b7013c862cf10701957b3d654aa70f11bd8 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From fb2a05ff43b6ce70835c9fd29eccb0fa76ac8da5 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 27 Apr 2024 09:08:33 +0200 Subject: [PATCH 0142/2648] feat(gallery): display job status also during navigation (#2151) * feat(gallery): keep showing progress also when refreshing Signed-off-by: Ettore Di Giacinto * fix(intel-gpu): better defaults Signed-off-by: Ettore Di Giacinto * feat: make it thread-safe Signed-off-by: mudler --------- Signed-off-by: Ettore Di Giacinto Signed-off-by: mudler --- core/config/backend_config.go | 8 +++- core/http/elements/gallery.go | 19 ++++++++- core/http/routes/ui.go | 30 +++++++++++--- pkg/xsync/map.go | 77 +++++++++++++++++++++++++++++++++++ pkg/xsync/map_test.go | 26 ++++++++++++ pkg/xsync/sync_suite_test.go | 13 ++++++ 6 files changed, 164 insertions(+), 9 deletions(-) create mode 100644 pkg/xsync/map.go create mode 100644 pkg/xsync/map_test.go create mode 100644 pkg/xsync/sync_suite_test.go diff --git a/core/config/backend_config.go b/core/config/backend_config.go index 64182e75..35e0776d 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -238,7 +238,13 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) { if cfg.MMap == nil { // MMap is enabled by default - cfg.MMap = &trueV + + // Only exception is for Intel GPUs + if os.Getenv("XPU") != "" { + cfg.MMap = &falseV + } else { + cfg.MMap = &trueV + } } if cfg.MMlock == nil { diff --git a/core/http/elements/gallery.go b/core/http/elements/gallery.go index c03750da..6edbd23d 100644 --- a/core/http/elements/gallery.go +++ b/core/http/elements/gallery.go @@ -6,6 +6,7 @@ import ( "github.com/chasefleming/elem-go" "github.com/chasefleming/elem-go/attrs" "github.com/go-skynet/LocalAI/pkg/gallery" + "github.com/go-skynet/LocalAI/pkg/xsync" ) const ( @@ -102,7 +103,8 @@ func cardSpan(text, icon string) elem.Node { ) } -func ListModels(models []*gallery.GalleryModel) string { +func ListModels(models []*gallery.GalleryModel, installing *xsync.SyncedMap[string, string]) string { + //StartProgressBar(uid, "0") modelsElements := []elem.Node{} span := func(s string) elem.Node { return elem.Span( @@ -118,6 +120,7 @@ func ListModels(models []*gallery.GalleryModel) string { "data-twe-ripple-init": "", "data-twe-ripple-color": "light", "class": "float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong", + "hx-swap": "outerHTML", // post the Model ID as param "hx-post": "/browse/install/model/" + fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name), }, @@ -152,6 +155,9 @@ func ListModels(models []*gallery.GalleryModel) string { } actionDiv := func(m *gallery.GalleryModel) elem.Node { + galleryID := fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name) + currentlyInstalling := installing.Exists(galleryID) + nodes := []elem.Node{ cardSpan("Repository: "+m.Gallery.Name, "fa-brands fa-git-alt"), } @@ -193,7 +199,16 @@ func ListModels(models []*gallery.GalleryModel) string { }, nodes..., ), - elem.If(m.Installed, span("Installed"), installButton(m)), + elem.If( + currentlyInstalling, + elem.Node( // If currently installing, show progress bar + elem.Raw(StartProgressBar(installing.Get(galleryID), "0")), + ), // Otherwise, show install button (if not installed) or display "Installed" + elem.If(m.Installed, + span("Installed"), + installButton(m), + ), + ), ) } diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go index c64ec5ff..b63b1870 100644 --- a/core/http/routes/ui.go +++ b/core/http/routes/ui.go @@ -10,6 +10,8 @@ import ( "github.com/go-skynet/LocalAI/core/services" "github.com/go-skynet/LocalAI/pkg/gallery" "github.com/go-skynet/LocalAI/pkg/model" + "github.com/go-skynet/LocalAI/pkg/xsync" + "github.com/gofiber/fiber/v2" "github.com/google/uuid" ) @@ -21,13 +23,16 @@ func RegisterUIRoutes(app *fiber.App, galleryService *services.GalleryService, auth func(*fiber.Ctx) error) { - // Show the Models page + // keeps the state of models that are being installed from the UI + var installingModels = xsync.NewSyncedMap[string, string]() + + // Show the Models page (all models) app.Get("/browse", auth, func(c *fiber.Ctx) error { models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath) summary := fiber.Map{ "Title": "LocalAI - Models", - "Models": template.HTML(elements.ListModels(models)), + "Models": template.HTML(elements.ListModels(models, installingModels)), "Repositories": appConfig.Galleries, // "ApplicationConfig": appConfig, } @@ -36,7 +41,7 @@ func RegisterUIRoutes(app *fiber.App, return c.Render("views/models", summary) }) - // HTMX: return the model details + // Show the models, filtered from the user input // https://htmx.org/examples/active-search/ app.Post("/browse/search/models", auth, func(c *fiber.Ctx) error { form := struct { @@ -58,12 +63,13 @@ func RegisterUIRoutes(app *fiber.App, } } - return c.SendString(elements.ListModels(filteredModels)) + return c.SendString(elements.ListModels(filteredModels, installingModels)) }) + // This route is used when the "Install" button is pressed, we submit here a new job to the gallery service // https://htmx.org/examples/progress-bar/ app.Post("/browse/install/model/:id", auth, func(c *fiber.Ctx) error { - galleryID := strings.Clone(c.Params("id")) // strings.Clone is required! + galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests! id, err := uuid.NewUUID() if err != nil { @@ -72,6 +78,8 @@ func RegisterUIRoutes(app *fiber.App, uid := id.String() + installingModels.Set(galleryID, uid) + op := gallery.GalleryOp{ Id: uid, GalleryName: galleryID, @@ -84,6 +92,8 @@ func RegisterUIRoutes(app *fiber.App, return c.SendString(elements.StartProgressBar(uid, "0")) }) + // Display the job current progress status + // If the job is done, we trigger the /browse/job/:uid route // https://htmx.org/examples/progress-bar/ app.Get("/browse/job/progress/:uid", auth, func(c *fiber.Ctx) error { jobUID := c.Params("uid") @@ -95,7 +105,7 @@ func RegisterUIRoutes(app *fiber.App, } if status.Progress == 100 { - c.Set("HX-Trigger", "done") + c.Set("HX-Trigger", "done") // this triggers /browse/job/:uid (which is when the job is done) return c.SendString(elements.ProgressBar("100")) } if status.Error != nil { @@ -105,7 +115,15 @@ func RegisterUIRoutes(app *fiber.App, return c.SendString(elements.ProgressBar(fmt.Sprint(status.Progress))) }) + // this route is hit when the job is done, and we display the + // final state (for now just displays "Installation completed") app.Get("/browse/job/:uid", auth, func(c *fiber.Ctx) error { + for _, k := range installingModels.Keys() { + if installingModels.Get(k) == c.Params("uid") { + installingModels.Delete(k) + } + } + return c.SendString(elements.DoneProgress(c.Params("uid"))) }) } diff --git a/pkg/xsync/map.go b/pkg/xsync/map.go new file mode 100644 index 00000000..9c3a471e --- /dev/null +++ b/pkg/xsync/map.go @@ -0,0 +1,77 @@ +package xsync + +import ( + "sync" +) + +type SyncedMap[K comparable, V any] struct { + mu sync.RWMutex + m map[K]V +} + +func NewSyncedMap[K comparable, V any]() *SyncedMap[K, V] { + return &SyncedMap[K, V]{ + m: make(map[K]V), + } +} + +func (m *SyncedMap[K, V]) Get(key K) V { + m.mu.RLock() + defer m.mu.RUnlock() + return m.m[key] +} + +func (m *SyncedMap[K, V]) Keys() []K { + m.mu.RLock() + defer m.mu.RUnlock() + keys := make([]K, 0, len(m.m)) + for k := range m.m { + keys = append(keys, k) + } + return keys +} + +func (m *SyncedMap[K, V]) Values() []V { + m.mu.RLock() + defer m.mu.RUnlock() + values := make([]V, 0, len(m.m)) + for _, v := range m.m { + values = append(values, v) + } + return values +} + +func (m *SyncedMap[K, V]) Len() int { + m.mu.RLock() + defer m.mu.RUnlock() + return len(m.m) +} + +func (m *SyncedMap[K, V]) Iterate(f func(key K, value V) bool) { + m.mu.RLock() + defer m.mu.RUnlock() + for k, v := range m.m { + if !f(k, v) { + break + } + } +} + +func (m *SyncedMap[K, V]) Set(key K, value V) { + m.mu.Lock() + m.m[key] = value + m.mu.Unlock() +} + +func (m *SyncedMap[K, V]) Delete(key K) { + m.mu.Lock() + delete(m.m, key) + m.mu.Unlock() +} + +func (m *SyncedMap[K, V]) Exists(key K) bool { + m.mu.RLock() + defer m.mu.RUnlock() + _, ok := m.m[key] + return ok +} diff --git a/pkg/xsync/map_test.go b/pkg/xsync/map_test.go new file mode 100644 index 00000000..a7ecfbcc --- /dev/null +++ b/pkg/xsync/map_test.go @@ -0,0 +1,26 @@ +package xsync_test + +import ( + . "github.com/go-skynet/LocalAI/pkg/xsync" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("SyncMap", func() { + + Context("Syncmap", func() { + It("sets and gets", func() { + m := NewSyncedMap[string, string]() + m.Set("foo", "bar") + Expect(m.Get("foo")).To(Equal("bar")) + }) + It("deletes", func() { + m := NewSyncedMap[string, string]() + m.Set("foo", "bar") + m.Delete("foo") + Expect(m.Get("foo")).To(Equal("")) + Expect(m.Exists("foo")).To(Equal(false)) + }) + }) +}) diff --git a/pkg/xsync/sync_suite_test.go b/pkg/xsync/sync_suite_test.go new file mode 100644 index 00000000..0dad9c66 --- /dev/null +++ b/pkg/xsync/sync_suite_test.go @@ -0,0 +1,13 @@ +package xsync_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestSync(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "LocalAI sync test") +} From 4c97406f2b3f10b68165caa7a4dfe09f01dc2cf0 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 27 Apr 2024 12:30:15 +0200 Subject: [PATCH 0143/2648] models(gallery): add Einstein v6.1 (#2152) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 9cc72d79..cf4e332a 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -31,6 +31,29 @@ - python ## LLMs ### START LLAMA3 +- name: "einstein-v6.1-llama3-8b" + url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master" + icon: https://cdn-uploads.huggingface.co/production/uploads/6468ce47e134d050a58aa89c/5s12oq859qLfDkkTNam_C.png + urls: + - https://huggingface.co/Weyaxi/Einstein-v6.1-Llama3-8B + tags: + - llm + - gguf + - gpu + - cpu + - llama3 + license: llama3 + description: | + This model is a full fine-tuned version of meta-llama/Meta-Llama-3-8B on diverse datasets. + + This model is finetuned using 8xRTX3090 + 1xRTXA6000 using axolotl. + overrides: + parameters: + model: Einstein-v6.1-Llama3-8B-Q4_K_M.gguf + files: + - filename: Einstein-v6.1-Llama3-8B-Q4_K_M.gguf + sha256: 3ef96fd6e32658774b3c8fbc24088787dfa911288e272b186f448c886400d30d + uri: huggingface://bartowski/Einstein-v6.1-Llama3-8B-GGUF/Einstein-v6.1-Llama3-8B-Q4_K_M.gguf - &llama3 url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master" name: "llama3-8b-instruct" From 935f4c23f64c79d21bf44c5821877bbc382d811d Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 27 Apr 2024 17:53:56 +0200 Subject: [PATCH 0144/2648] models(gallery): add SOVL (#2154) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index cf4e332a..2162ba52 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -236,6 +236,22 @@ - filename: Chaos_RP_l3_8B-Q4_K_M-imat.gguf sha256: 4273c5a8f23d49bf6294e620a5aa1fcd78d491ea0b90d0ec63ad708eedb83893 uri: huggingface://Lewdiculous/Chaos_RP_l3_8B-GGUF-IQ-Imatrix/Chaos_RP_l3_8B-Q4_K_M-imat.gguf +- <<: *llama3 + name: "sovl_llama3_8b-gguf-iq-imatrix" + urls: + - https://huggingface.co/Lewdiculous/SOVL_Llama3_8B-GGUF-IQ-Imatrix + icon: https://cdn-uploads.huggingface.co/production/uploads/626dfb8786671a29c715f8a9/N_1D87adbMuMlSIQ5rI3_.png + description: | + I'm not gonna tell you this is the best model anyone has ever made. I'm not going to tell you that you will love chatting with SOVL. + + What I am gonna say is thank you for taking the time out of your day. Without users like you, my work would be meaningless. + overrides: + parameters: + model: SOVL_Llama3_8B-Q4_K_M-imat.gguf + files: + - filename: SOVL_Llama3_8B-Q4_K_M-imat.gguf + sha256: ee61890dd26d52985a3c44279d519ca8592448ddeb46387cf22868548703d686 + uri: huggingface://Lewdiculous/SOVL_Llama3_8B-GGUF-IQ-Imatrix/SOVL_Llama3_8B-Q4_K_M-imat.gguf - <<: *llama3 name: "llama-3-unholy-8b:Q8_0" urls: From 1f8461767d0b728cbee1805ebeffa0b59d58f6a8 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 27 Apr 2024 17:59:31 +0200 Subject: [PATCH 0145/2648] models(gallery): add average_normie (#2155) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 2162ba52..4d26030c 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -252,6 +252,34 @@ - filename: SOVL_Llama3_8B-Q4_K_M-imat.gguf sha256: ee61890dd26d52985a3c44279d519ca8592448ddeb46387cf22868548703d686 uri: huggingface://Lewdiculous/SOVL_Llama3_8B-GGUF-IQ-Imatrix/SOVL_Llama3_8B-Q4_K_M-imat.gguf +- <<: *llama3 + name: "average_normie_l3_v1_8b-gguf-iq-imatrix" + urls: + - https://huggingface.co/Lewdiculous/Average_Normie_l3_v1_8B-GGUF-IQ-Imatrix + icon: https://cdn-uploads.huggingface.co/production/uploads/626dfb8786671a29c715f8a9/dvNIj1rSTjBvgs3XJfqXK.png + description: | + A model by an average normie for the average normie. + + This model is a stock merge of the following models: + + https://huggingface.co/cgato/L3-TheSpice-8b-v0.1.3 + + https://huggingface.co/Sao10K/L3-Solana-8B-v1 + + https://huggingface.co/ResplendentAI/Kei_Llama3_8B + + The final merge then had the following LoRA applied over it: + + https://huggingface.co/ResplendentAI/Theory_of_Mind_Llama3 + + This should be an intelligent and adept roleplaying model. + overrides: + parameters: + model: Average_Normie_l3_v1_8B-Q4_K_M-imat.gguf + files: + - filename: Average_Normie_l3_v1_8B-Q4_K_M-imat.gguf + sha256: 9e98cd2672f716a0872912fdc4877969efd14d6f682f28e156f8591591c00d9c + uri: huggingface://Lewdiculous/Average_Normie_l3_v1_8B-GGUF-IQ-Imatrix/Average_Normie_l3_v1_8B-Q4_K_M-imat.gguf - <<: *llama3 name: "llama-3-unholy-8b:Q8_0" urls: From 164be58445066b8756e7a0eca96290fa2a63fc42 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 27 Apr 2024 18:10:58 +0200 Subject: [PATCH 0146/2648] Update README.md Signed-off-by: Ettore Di Giacinto --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 343a7cf5..7fe1a598 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,8 @@ - All-in-one container image: https://github.com/mudler/LocalAI/issues/1855 Hot topics (looking for contributors): + +- WebUI improvements: https://github.com/mudler/LocalAI/issues/2156 - Backends v2: https://github.com/mudler/LocalAI/issues/1126 - Improving UX v2: https://github.com/mudler/LocalAI/issues/1373 - Assistant API: https://github.com/mudler/LocalAI/issues/1273 From 9fc013599118dc49dd77b0993864f14d6dcb7836 Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Sat, 27 Apr 2024 12:48:20 -0500 Subject: [PATCH 0147/2648] feat: cleanup Dockerfile and make final image a little smaller (#2146) * feat: cleanup Dockerfile and make final image a little smaller Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: add build-essential to final stage Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: more GRPC cache misses Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: correct for another cause of GRPC cache misses Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * feat: generate new GRPC cache automatically if needed Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: use new GRPC_MAKEFLAGS build arg in GRPC cache generation Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --------- Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- .github/workflows/generate_grpc_cache.yaml | 8 +- .github/workflows/image_build.yml | 9 +- Dockerfile | 144 ++++++++++++++------- 3 files changed, 112 insertions(+), 49 deletions(-) diff --git a/.github/workflows/generate_grpc_cache.yaml b/.github/workflows/generate_grpc_cache.yaml index c6b080b5..deda6084 100644 --- a/.github/workflows/generate_grpc_cache.yaml +++ b/.github/workflows/generate_grpc_cache.yaml @@ -1,7 +1,10 @@ name: 'generate and publish GRPC docker caches' on: -- workflow_dispatch + workflow_dispatch: + push: + branches: + - master concurrency: group: grpc-cache-${{ github.head_ref || github.ref }}-${{ github.repository }} @@ -80,11 +83,12 @@ jobs: # If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch. build-args: | GRPC_BASE_IMAGE=${{ matrix.grpc-base-image }} - MAKEFLAGS=--jobs=4 --output-sync=target + GRPC_MAKEFLAGS=--jobs=4 --output-sync=target GRPC_VERSION=v1.58.0 context: . file: ./Dockerfile cache-to: type=gha,ignore-error=true + cache-from: type=gha target: grpc platforms: ${{ matrix.platforms }} push: false \ No newline at end of file diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index b06100ff..fb1985fd 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -210,7 +210,7 @@ jobs: # If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch. build-args: | GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }} - MAKEFLAGS=--jobs=4 --output-sync=target + GRPC_MAKEFLAGS=--jobs=4 --output-sync=target GRPC_VERSION=v1.58.0 context: . file: ./Dockerfile @@ -225,6 +225,10 @@ jobs: uses: docker/build-push-action@v5 with: builder: ${{ steps.buildx.outputs.name }} + # The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache. + # This means that even the MAKEFLAGS have to be an EXACT match. + # If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch. + # This is why some build args like GRPC_VERSION and MAKEFLAGS are hardcoded build-args: | BUILD_TYPE=${{ inputs.build-type }} CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }} @@ -232,6 +236,9 @@ jobs: FFMPEG=${{ inputs.ffmpeg }} IMAGE_TYPE=${{ inputs.image-type }} BASE_IMAGE=${{ inputs.base-image }} + GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }} + GRPC_MAKEFLAGS=--jobs=4 --output-sync=target + GRPC_VERSION=v1.58.0 MAKEFLAGS=${{ inputs.makeflags }} context: . file: ./Dockerfile diff --git a/Dockerfile b/Dockerfile index 4d12cb56..717b3a3a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -21,22 +21,22 @@ ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface ARG GO_TAGS="stablediffusion tinydream tts" RUN apt-get update && \ - apt-get install -y ca-certificates curl python3-pip unzip && apt-get clean + apt-get install -y --no-install-recommends \ + ca-certificates \ + curl \ + python3-pip \ + unzip && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* # Install Go RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz -ENV PATH $PATH:/usr/local/go/bin +ENV PATH $PATH:/root/go/bin:/usr/local/go/bin # Install grpc compilers -ENV PATH $PATH:/root/go/bin RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@latest && \ go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest -# Install protobuf (the version in 22.04 is too old) -RUN curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \ - unzip -j -d /usr/local/bin protoc.zip bin/protoc && \ - rm protoc.zip - # Install grpcio-tools (the version in 22.04 is too old) RUN pip install --user grpcio-tools @@ -49,12 +49,21 @@ RUN echo "Target Variant: $TARGETVARIANT" # CuBLAS requirements RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \ - apt-get install -y software-properties-common && \ - curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \ - dpkg -i cuda-keyring_1.1-1_all.deb && \ - rm -f cuda-keyring_1.1-1_all.deb && \ - apt-get update && \ - apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && apt-get clean \ + apt-get update && \ + apt-get install -y --no-install-recommends \ + software-properties-common && \ + curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \ + dpkg -i cuda-keyring_1.1-1_all.deb && \ + rm -f cuda-keyring_1.1-1_all.deb && \ + apt-get update && \ + apt-get install -y --no-install-recommends \ + cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \ + libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \ + libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \ + libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \ + libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* \ ; fi # Cuda @@ -64,10 +73,12 @@ ENV PATH /usr/local/cuda/bin:${PATH} ENV PATH /opt/rocm/bin:${PATH} # OpenBLAS requirements and stable diffusion -RUN apt-get install -y \ - libopenblas-dev \ - libopencv-dev \ - && apt-get clean +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + libopenblas-dev \ + libopencv-dev && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* # Set up OpenCV RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2 @@ -82,24 +93,37 @@ RUN test -n "$TARGETARCH" \ FROM requirements-core AS requirements-extras -RUN apt install -y gpg && \ +RUN apt-get update && \ + apt-get install -y --no-install-recommends gpg && \ curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \ install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \ gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \ echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list && \ echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list && \ apt-get update && \ - apt-get install -y conda && apt-get clean + apt-get install -y --no-install-recommends \ + conda && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* ENV PATH="/root/.cargo/bin:${PATH}" -RUN apt-get install -y python3-pip && apt-get clean -RUN pip install --upgrade pip +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + python3-pip && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* && \ + pip install --upgrade pip RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y -RUN apt-get install -y espeak-ng espeak && apt-get clean +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + espeak-ng \ + espeak && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* RUN if [ ! -e /usr/bin/python ]; then \ - ln -s /usr/bin/python3 /usr/bin/python \ + ln -s /usr/bin/python3 /usr/bin/python \ ; fi ################################### @@ -107,15 +131,20 @@ RUN if [ ! -e /usr/bin/python ]; then \ FROM ${GRPC_BASE_IMAGE} AS grpc -ARG MAKEFLAGS +# This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI +ARG GRPC_MAKEFLAGS="-j4 -Otarget" ARG GRPC_VERSION=v1.58.0 -ENV MAKEFLAGS=${MAKEFLAGS} +ENV MAKEFLAGS=${GRPC_MAKEFLAGS} WORKDIR /build RUN apt-get update && \ - apt-get install -y build-essential cmake git && \ + apt-get install -y --no-install-recommends \ + ca-certificates \ + build-essential \ + cmake \ + git && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* @@ -123,8 +152,12 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall WORKDIR /build/grpc/cmake/build -RUN cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF ../.. && \ - make +# We install GRPC to a different prefix here so that we can copy in only the build artifacts later +# saves several hundred MB on the final docker image size vs copying in the entire GRPC source tree +# and running make install in the target container +RUN cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX:PATH=/opt/grpc ../.. && \ + make && \ + make install ################################### ################################### @@ -149,7 +182,10 @@ COPY .git . RUN echo "GO_TAGS: $GO_TAGS" RUN apt-get update && \ - apt-get install -y build-essential cmake git && \ + apt-get install -y --no-install-recommends \ + build-essential \ + cmake \ + git && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* @@ -157,26 +193,33 @@ RUN make prepare # If we are building with clblas support, we need the libraries for the builds RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \ - apt-get update && \ - apt-get install -y libclblast-dev && \ - apt-get clean \ + apt-get update && \ + apt-get install -y --no-install-recommends \ + libclblast-dev && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* \ ; fi +# We need protoc installed, and the version in 22.04 is too old. We will create one as part installing the GRPC build below +# but that will also being in a newer version of absl which stablediffusion cannot compile with. This version of protoc is only +# here so that we can generate the grpc code for the stablediffusion build +RUN curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \ + unzip -j -d /usr/local/bin protoc.zip bin/protoc && \ + rm protoc.zip + # stablediffusion does not tolerate a newer version of abseil, build it first RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build -COPY --from=grpc /build/grpc ./grpc/ - -WORKDIR /build/grpc/cmake/build -RUN make install +# Install the pre-built GRPC +COPY --from=grpc /opt/grpc /usr/local # Rebuild with defaults backends WORKDIR /build RUN make build RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \ - mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \ - touch /build/sources/go-piper/piper-phonemize/pi/lib/keep \ + mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \ + touch /build/sources/go-piper/piper-phonemize/pi/lib/keep \ ; fi ################################### @@ -203,18 +246,27 @@ ENV PIP_CACHE_PURGE=true # Add FFmpeg RUN if [ "${FFMPEG}" = "true" ]; then \ - apt-get install -y ffmpeg && apt-get clean \ + apt-get update && \ + apt-get install -y --no-install-recommends \ + ffmpeg && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* \ ; fi # Add OpenCL RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \ - apt-get update && \ - apt-get install -y libclblast1 && \ - apt-get clean \ + apt-get update && \ + apt-get install -y --no-install-recommends \ + libclblast1 && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* \ ; fi RUN apt-get update && \ - apt-get install -y cmake git && \ + apt-get install -y --no-install-recommends \ + build-essential \ + cmake \ + git && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* @@ -227,9 +279,9 @@ WORKDIR /build COPY . . COPY --from=builder /build/sources ./sources/ -COPY --from=grpc /build/grpc ./grpc/ +COPY --from=grpc /opt/grpc /usr/local -RUN make prepare-sources && cd /build/grpc/cmake/build && make install && rm -rf /build/grpc +RUN make prepare-sources # Copy the binary COPY --from=builder /build/local-ai ./ From 7e6bf6e7a177848df28e5e0cdfb39b94a43c8c4b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 27 Apr 2024 19:52:26 +0200 Subject: [PATCH 0148/2648] ci: add auto-label rule for gallery in labeler.yml Signed-off-by: Ettore Di Giacinto --- .github/labeler.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/labeler.yml b/.github/labeler.yml index 64a88f43..687a90d1 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -8,6 +8,11 @@ kind/documentation: - changed-files: - any-glob-to-any-file: '*.md' +area/ai-model: +- any: + - changed-files: + - any-glob-to-any-file: 'gallery/*' + examples: - any: - changed-files: @@ -16,4 +21,4 @@ examples: ci: - any: - changed-files: - - any-glob-to-any-file: '.github/*' \ No newline at end of file + - any-glob-to-any-file: '.github/*' From c3982212f9946ab3a1d92e1444bea871febdae0c Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sat, 27 Apr 2024 23:32:43 +0200 Subject: [PATCH 0149/2648] :arrow_up: Update ggerganov/llama.cpp (#2159) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 15aea6ce..6ef6e9ab 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=928e0b7013c862cf10701957b3d654aa70f11bd8 +CPPLLAMA_VERSION?=4dba7e8114d84241c842b986e008af8b88d1a019 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 773d5d23d59c7215e5bf55df06ee937652ae5d9b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 28 Apr 2024 11:57:22 +0200 Subject: [PATCH 0150/2648] models(gallery): add solana (#2157) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 4d26030c..2ba1558f 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -252,6 +252,23 @@ - filename: SOVL_Llama3_8B-Q4_K_M-imat.gguf sha256: ee61890dd26d52985a3c44279d519ca8592448ddeb46387cf22868548703d686 uri: huggingface://Lewdiculous/SOVL_Llama3_8B-GGUF-IQ-Imatrix/SOVL_Llama3_8B-Q4_K_M-imat.gguf +- <<: *llama3 + name: "l3-solana-8b-v1-gguf" + url: "github:mudler/LocalAI/gallery/solana.yaml@master" + license: cc-by-nc-4.0 + urls: + - https://huggingface.co/Sao10K/L3-Solana-8B-v1-GGUF + description: | + A Full Fine-Tune of meta-llama/Meta-Llama-3-8B done with 2x A100 80GB on ~75M Tokens worth of Instruct, and Multi-Turn complex conversations, of up to 8192 tokens long sequence lengths. + + Trained as a generalist instruct model that should be able to handle certain unsavoury topics. It could roleplay too, as a side bonus. + overrides: + parameters: + model: L3-Solana-8B-v1.q5_K_M.gguf + files: + - filename: L3-Solana-8B-v1.q5_K_M.gguf + sha256: 9b8cd2c3beaab5e4f82efd10e7d44f099ad40a4e0ee286ca9fce02c8eec26d2f + uri: huggingface://Sao10K/L3-Solana-8B-v1-GGUF/L3-Solana-8B-v1.q5_K_M.gguf - <<: *llama3 name: "average_normie_l3_v1_8b-gguf-iq-imatrix" urls: From 0f0ae13ad05d60312dc6ff72bf53f05d548d6daf Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 28 Apr 2024 12:01:01 +0200 Subject: [PATCH 0151/2648] models(gallery): add poppy porpoise (#2158) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 2ba1558f..fdea1d9e 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -495,6 +495,32 @@ - filename: llava-v1.5-7b-mmproj-Q8_0.gguf sha256: 09c230de47f6f843e4841656f7895cac52c6e7ec7392acb5e8527de8b775c45a uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-mmproj-Q8_0.gguf +- <<: *llama3 + name: "poppy_porpoise-v0.7-l3-8b-iq-imatrix" + description: | + "Poppy Porpoise" is a cutting-edge AI roleplay assistant based on the Llama 3 8B model, specializing in crafting unforgettable narrative experiences. With its advanced language capabilities, Poppy expertly immerses users in an interactive and engaging adventure, tailoring each adventure to their individual preferences. + + Update: Vision/multimodal capabilities again! + icon: https://cdn-uploads.huggingface.co/production/uploads/642265bc01c62c1e4102dc36/v6AZmbk-Cb52KskTQTwzW.png + tags: + - llm + - multimodal + - gguf + - gpu + - llama3 + - cpu + - llava-1.5 + overrides: + mmproj: Llava_1.5_Llama3_mmproj.gguf + parameters: + model: Poppy_Porpoise-v0.7-L3-8B-Q4_K_M-imat.gguf + files: + - filename: Poppy_Porpoise-v0.7-L3-8B-Q4_K_M-imat.gguf + sha256: 04badadd6c88cd9c706efef8f5cd337057c805e43dd440a5936f87720c37eb33 + uri: huggingface://Lewdiculous/Poppy_Porpoise-v0.7-L3-8B-GGUF-IQ-Imatrix/Poppy_Porpoise-v0.7-L3-8B-Q4_K_M-imat.gguf + - filename: Llava_1.5_Llama3_mmproj.gguf + sha256: d2a9ca943975f6c49c4d55886e873f676a897cff796e92410ace6c20f4efd03b + uri: huggingface://ChaoticNeutrals/Llava_1.5_Llama3_mmproj/mmproj-model-f16.gguf ### START Phi-2 - &phi-2-chat url: "github:mudler/LocalAI/gallery/phi-2-chat.yaml@master" From a248ede222bf5d3761a91fae827be4489d5590d6 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 28 Apr 2024 12:02:50 +0200 Subject: [PATCH 0152/2648] models(gallery): add Undi95/Llama-3-LewdPlay-8B-evo-GGUF (#2160) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index fdea1d9e..d4e8d6b4 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -218,6 +218,23 @@ - filename: lexi-llama-3-8b-uncensored.Q6_K.gguf sha256: 5805f3856cc18a769fae0b7c5659fe6778574691c370c910dad6eeec62c62436 uri: huggingface://NikolayKozloff/Lexi-Llama-3-8B-Uncensored-Q6_K-GGUF/lexi-llama-3-8b-uncensored.Q6_K.gguf +- <<: *llama3 + name: "llama-3-lewdplay-8b-evo" + urls: + - https://huggingface.co/Undi95/Llama-3-LewdPlay-8B-evo-GGUF + description: | + This is a merge of pre-trained language models created using mergekit. + + The new EVOLVE merge method was used (on MMLU specifically), see below for more information! + + Unholy was used for uncensoring, Roleplay Llama 3 for the DPO train he got on top, and LewdPlay for the... lewd side. + overrides: + parameters: + model: Llama-3-LewdPlay-8B-evo.q8_0.gguf + files: + - filename: Llama-3-LewdPlay-8B-evo.q8_0.gguf + sha256: 1498152d598ff441f73ec6af9d3535875302e7251042d87feb7e71a3618966e8 + uri: huggingface://Undi95/Llama-3-LewdPlay-8B-evo-GGUF/Llama-3-LewdPlay-8B-evo.q8_0.gguf - <<: *llama3 name: "chaos-rp_l3_b-iq-imatrix" urls: From a8089494fdcaa4cfbc1997f3d32cd225ed395de1 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 28 Apr 2024 12:04:04 +0200 Subject: [PATCH 0153/2648] models(gallery): add biomistral-7b (#2161) * models(gallery): add biomistral-7b Signed-off-by: Ettore Di Giacinto * add <|end_of_text|> to llama3 as stopword Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 14 ++++++++++++++ gallery/llama3-instruct.yaml | 1 + 2 files changed, 15 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index d4e8d6b4..012a1ecb 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -698,6 +698,20 @@ - filename: "Hermes-2-Pro-Mistral-7B.Q8_0.gguf" sha256: "b6d95d7ec9a395b7568cc94b0447fd4f90b6f69d6e44794b1fbb84e3f732baca" uri: "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q8_0.gguf" +- <<: *hermes-2-pro-mistral + name: "biomistral-7b" + description: | + BioMistral: A Collection of Open-Source Pretrained Large Language Models for Medical Domains + urls: + - https://huggingface.co/MaziyarPanahi/BioMistral-7B-GGUF + icon: https://huggingface.co/BioMistral/BioMistral-7B/resolve/main/wordart_blue_m_rectangle.png?download=true + overrides: + parameters: + model: BioMistral-7B.Q4_K_M.gguf + files: + - filename: "BioMistral-7B.Q4_K_M.gguf" + sha256: "3a73107045dfe7e3f113b392b0a67e3e6ca9fa9dae2abe301424ce5abd1721a6" + uri: "huggingface://MaziyarPanahi/BioMistral-7B-GGUF/BioMistral-7B.Q4_K_M.gguf" ### END Hermes-2-Pro-Mistral ### START Cerbero - url: "github:mudler/LocalAI/gallery/cerbero.yaml@master" diff --git a/gallery/llama3-instruct.yaml b/gallery/llama3-instruct.yaml index f6016cbd..3eed758b 100644 --- a/gallery/llama3-instruct.yaml +++ b/gallery/llama3-instruct.yaml @@ -41,3 +41,4 @@ config_file: | - <|im_end|> - - "<|eot_id|>" + - <|end_of_text|> From 3179c019af17a7fdede8089eaa410359ca151d74 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 28 Apr 2024 12:32:49 +0200 Subject: [PATCH 0154/2648] Revert ":arrow_up: Update docs version mudler/LocalAI" (#2165) * Revert ":arrow_up: Update docs version mudler/LocalAI (#2149)" This reverts commit 56d843c263ce72409baefe2201130737c052239d. * Apply suggestions from code review Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto --- docs/data/version.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/data/version.json b/docs/data/version.json index ad22e2aa..ce7f5d53 100644 --- a/docs/data/version.json +++ b/docs/data/version.json @@ -1,3 +1,3 @@ { - "version": " 🖼️ v2.13.0 - Model gallery edition" + "version": "v2.13.0" } From 1a0a6f60a7dfd9522ca680048b62180e6b238bf0 Mon Sep 17 00:00:00 2001 From: QuinnPiers <167640194+QuinnPiers@users.noreply.github.com> Date: Sun, 28 Apr 2024 03:34:15 -0700 Subject: [PATCH 0155/2648] docs: update model-gallery.md with correct gallery file (#2163) * Update model-gallery.md with correct gallery file The readme points to a file that hasn't been updated in months so when there are announcements about new models, user's won't get them pointing to the old file. Point to the updated files instead. Signed-off-by: QuinnPiers <167640194+QuinnPiers@users.noreply.github.com> * Update model-gallery.md second pass with more understanding Signed-off-by: QuinnPiers <167640194+QuinnPiers@users.noreply.github.com> * Update model-gallery.md Signed-off-by: QuinnPiers <167640194+QuinnPiers@users.noreply.github.com> * Update model-gallery.md Signed-off-by: QuinnPiers <167640194+QuinnPiers@users.noreply.github.com> --------- Signed-off-by: QuinnPiers <167640194+QuinnPiers@users.noreply.github.com> --- docs/content/docs/features/model-gallery.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/content/docs/features/model-gallery.md b/docs/content/docs/features/model-gallery.md index 05d15ef4..137caee1 100644 --- a/docs/content/docs/features/model-gallery.md +++ b/docs/content/docs/features/model-gallery.md @@ -42,13 +42,13 @@ To enable the `model-gallery` repository you need to start `local-ai` with the ` GALLERIES=[{"name":"", "url":"where url is `github:mudler/localai/gallery/index.yaml` and name is localai +
will format the values into a valid github url `https://raw.githubusercontent.com/mudler/LocalAI/master/gallery/index.yaml` {{% alert note %}} @@ -60,7 +60,7 @@ As this feature is experimental, you need to run `local-ai` with a list of `GALL To enable the two repositories, start `LocalAI` with the `GALLERIES` environment variable: ```bash -GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}, {"url": "github:go-skynet/model-gallery/huggingface.yaml","name":"huggingface"}] +GALLERIES=[{"name":"gallery", "url":"github:mudler/localai/gallery/index.yaml"}, {"url": "github:go-skynet/model-gallery/huggingface.yaml","name":"huggingface"}] ``` If running with `docker-compose`, simply edit the `.env` file and uncomment the `GALLERIES` variable, and add the one you want to use. From 5e243ceaebf523e40968691c67851a214e9397cc Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 28 Apr 2024 15:54:15 +0200 Subject: [PATCH 0156/2648] docs: update gallery, add rerankers (#2166) Signed-off-by: Ettore Di Giacinto --- README.md | 3 +- docs/content/docs/advanced/_index.en.md | 2 +- docs/content/docs/features/model-gallery.md | 160 +++++++------------- docs/content/docs/features/reranker.md | 57 +++++++ docs/content/docs/features/text-to-audio.md | 4 + docs/content/docs/integrations.md | 2 +- docs/content/docs/overview.md | 3 +- docs/content/docs/reference/_index.en.md | 2 +- 8 files changed, 123 insertions(+), 110 deletions(-) create mode 100644 docs/content/docs/features/reranker.md diff --git a/README.md b/README.md index 7fe1a598..27b871ab 100644 --- a/README.md +++ b/README.md @@ -91,7 +91,8 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu - 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/) - ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/) - 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/) -- 🆕 [Vision API](https://localai.io/features/gpt-vision/) +- 🥽 [Vision API](https://localai.io/features/gpt-vision/) +- 🆕 [Reranker API](https://localai.io/features/reranker/) ## 💻 Usage diff --git a/docs/content/docs/advanced/_index.en.md b/docs/content/docs/advanced/_index.en.md index fade370b..bee814b4 100644 --- a/docs/content/docs/advanced/_index.en.md +++ b/docs/content/docs/advanced/_index.en.md @@ -2,7 +2,7 @@ weight: 20 title: "Advanced" description: "Advanced usage" -icon: science +icon: settings lead: "" date: 2020-10-06T08:49:15+00:00 lastmod: 2020-10-06T08:49:15+00:00 diff --git a/docs/content/docs/features/model-gallery.md b/docs/content/docs/features/model-gallery.md index 137caee1..eca9519b 100644 --- a/docs/content/docs/features/model-gallery.md +++ b/docs/content/docs/features/model-gallery.md @@ -7,15 +7,10 @@ weight = 18 url = '/models' +++ -

-
-
-
-

+The model gallery is a curated collection of models configurations for [LocalAI](https://github.com/go-skynet/LocalAI) that enables one-click install of models directly from the LocalAI Web interface. -The model gallery is a (experimental!) collection of models configurations for [LocalAI](https://github.com/go-skynet/LocalAI). +LocalAI to ease out installations of models provide a way to preload models on start and downloading and installing them in runtime. You can install models manually by copying them over the `models` directory, or use the API or the Web interface to configure, download and verify the model assets for you. -LocalAI to ease out installations of models provide a way to preload models on start and downloading and installing them in runtime. You can install models manually by copying them over the `models` directory, or use the API to configure, download and verify the model assets for you. As the UI is still a work in progress, you will find here the documentation about the API Endpoints. {{% alert note %}} The models in this gallery are not directly maintained by LocalAI. If you find a model that is not working, please open an issue on the model gallery repository. @@ -25,58 +20,55 @@ The models in this gallery are not directly maintained by LocalAI. If you find a GPT and text generation models might have a license which is not permissive for commercial use or might be questionable or without any license at all. Please check the model license before using it. The official gallery contains only open licensed models. {{% /alert %}} +![output](https://github.com/mudler/LocalAI/assets/2420543/7b16676e-d5b1-4c97-89bd-9fa5065c21ad) + ## Useful Links and resources - [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) - here you can find a list of the most performing models on the Open LLM benchmark. Keep in mind models compatible with LocalAI must be quantized in the `gguf` format. +## How it works -## Model repositories +Navigate the WebUI interface in the "Models" section from the navbar at the top. Here you can find a list of models that can be installed, and you can install them by clicking the "Install" button. + +## Add other galleries + +You can add other galleries by setting the `GALLERIES` environment variable. The `GALLERIES` environment variable is a list of JSON objects, where each object has a `name` and a `url` field. The `name` field is the name of the gallery, and the `url` field is the URL of the gallery's index file, for example: + +```json +GALLERIES=[{"name":"", "url":"", "url":"where url is `github:mudler/localai/gallery/index.yaml` and name is localai -
will format the values into a valid github url `https://raw.githubusercontent.com/mudler/LocalAI/master/gallery/index.yaml` + +where `github:mudler/localai/gallery/index.yaml` will be expanded automatically to `https://raw.githubusercontent.com/mudler/LocalAI/main/index.yaml`. + +Note: the url are expanded automatically for `github` and `huggingface`, however `https://` and `http://` prefix works as well. {{% alert note %}} -As this feature is experimental, you need to run `local-ai` with a list of `GALLERIES`. Currently there are two galleries: - -- An official one, containing only definitions and models with a clear LICENSE to avoid any dmca infringment. As I'm not sure what's the best action to do in this case, I'm not going to include any model that is not clearly licensed in this repository which is offically linked to LocalAI. -- A "community" one that contains an index of `huggingface` models that are compatible with the `ggml` format and lives in the `localai-huggingface-zoo` repository. - -To enable the two repositories, start `LocalAI` with the `GALLERIES` environment variable: - -```bash -GALLERIES=[{"name":"gallery", "url":"github:mudler/localai/gallery/index.yaml"}, {"url": "github:go-skynet/model-gallery/huggingface.yaml","name":"huggingface"}] -``` - -If running with `docker-compose`, simply edit the `.env` file and uncomment the `GALLERIES` variable, and add the one you want to use. - -{{% /alert %}} - -{{% alert note %}} -You might not find all the models in this gallery. Automated CI updates the gallery automatically. You can find however most of the models on huggingface (https://huggingface.co/), generally it should be available `~24h` after upload. - -By under any circumstances LocalAI and any developer is not responsible for the models in this gallery, as CI is just indexing them and providing a convenient way to install with an automatic configuration with a consistent API. Don't install models from authors you don't trust, and, check the appropriate license for your use case. Models are automatically indexed and hosted on huggingface (https://huggingface.co/). For any issue with the models, please open an issue on the model gallery repository if it's a LocalAI misconfiguration, otherwise refer to the huggingface repository. If you think a model should not be listed, please reach to us and we will remove it from the gallery. -{{% /alert %}} - -{{% alert note %}} - -There is no documentation yet on how to build a gallery or a repository - but you can find an example in the [model-gallery](https://github.com/go-skynet/model-gallery) repository. - +If you want to build your own gallery, there is no documentation yet. However you can find the source of the default gallery in the [LocalAI repository](https://github.com/mudler/LocalAI/tree/master/gallery). {{% /alert %}} @@ -110,34 +102,16 @@ To install a model from the gallery repository, you can pass the model name in t ```bash LOCALAI=http://localhost:8080 curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{ - "id": "model-gallery@bert-embeddings" + "id": "localai@bert-embeddings" }' ``` where: -- `model-gallery` is the repository. It is optional and can be omitted. If the repository is omitted LocalAI will search the model by name in all the repositories. In the case the same model name is present in both galleries the first match wins. +- `localai` is the repository. It is optional and can be omitted. If the repository is omitted LocalAI will search the model by name in all the repositories. In the case the same model name is present in both galleries the first match wins. - `bert-embeddings` is the model name in the gallery - (read its [config here](https://github.com/go-skynet/model-gallery/blob/main/bert-embeddings.yaml)). + (read its [config here](https://github.com/mudler/LocalAI/tree/master/gallery/blob/main/bert-embeddings.yaml)). -{{% alert note %}} -If the `huggingface` model gallery is enabled (it's enabled by default), -and the model has an entry in the model gallery's associated YAML config -(for `huggingface`, see [`model-gallery/huggingface.yaml`](https://github.com/go-skynet/model-gallery/blob/main/huggingface.yaml)), -you can install models by specifying directly the model's `id`. -For example, to install wizardlm superhot: - -```bash -LOCALAI=http://localhost:8080 -curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{ - "id": "huggingface@TheBloke/WizardLM-13B-V1-0-Uncensored-SuperHOT-8K-GGML/wizardlm-13b-v1.0-superhot-8k.ggmlv3.q4_K_M.bin" - }' -``` - -Note that the `id` can be used similarly when pre-loading models at start. -{{% /alert %}} - - -## How to install a model (without a gallery) +### How to install a model not part of a gallery If you don't want to set any gallery repository, you can still install models by loading a model configuration file. @@ -201,13 +175,13 @@ Note: `url` or `id` must be specified. `url` is used to a url to a model gallery For example: ```bash -PRELOAD_MODELS=[{"url": "github:go-skynet/model-gallery/stablediffusion.yaml"}] +PRELOAD_MODELS=[{"url": "github:mudler/LocalAI/gallery/stablediffusion.yaml@master"}] ``` or as arg: ```bash -local-ai --preload-models '[{"url": "github:go-skynet/model-gallery/stablediffusion.yaml"}]' +local-ai --preload-models '[{"url": "github:mudler/LocalAI/gallery/stablediffusion.yaml@master"}]' ``` or in a YAML file: @@ -218,14 +192,14 @@ local-ai --preload-models-config "/path/to/yaml" YAML: ```yaml -- url: github:go-skynet/model-gallery/stablediffusion.yaml +- url: github:mudler/LocalAI/gallery/stablediffusion.yaml@master ```
{{% alert note %}} -You can find already some open licensed models in the [model gallery](https://github.com/go-skynet/model-gallery). +You can find already some open licensed models in the [LocalAI gallery](https://github.com/mudler/LocalAI/tree/master/gallery). If you don't find the model in the gallery you can try to use the "base" model and provide an URL to LocalAI: @@ -233,7 +207,7 @@ If you don't find the model in the gallery you can try to use the "base" model a ``` curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{ - "url": "github:go-skynet/model-gallery/base.yaml", + "url": "github:mudler/LocalAI/gallery/base.yaml@master", "name": "model-name", "files": [ { @@ -249,7 +223,7 @@ curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{ {{% /alert %}} -## Installing a model with a different name +### Override a model name To install a model with a different name, specify a `name` parameter in the request body. @@ -266,11 +240,11 @@ For example, to install a model as `gpt-3.5-turbo`: ```bash LOCALAI=http://localhost:8080 curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{ - "url": "github:go-skynet/model-gallery/gpt4all-j.yaml", + "url": "github:mudler/LocalAI/gallery/gpt4all-j.yaml", "name": "gpt-3.5-turbo" }' ``` -## Additional Files +### Additional Files
@@ -293,7 +267,7 @@ curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
-## Overriding configuration files +### Overriding configuration files
@@ -324,7 +298,7 @@ curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{ ```bash curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{ - "url": "github:go-skynet/model-gallery/bert-embeddings.yaml", + "url": "github:mudler/LocalAI/gallery/bert-embeddings.yaml", "name": "text-embedding-ada-002" }' ``` @@ -348,10 +322,10 @@ URL: https://github.com/EdVince/Stable-Diffusion-NCNN {{< tabs >}} {{% tab name="Prepare the model in runtime" %}} -While the API is running, you can install the model by using the `/models/apply` endpoint and point it to the `stablediffusion` model in the [models-gallery](https://github.com/go-skynet/model-gallery#image-generation-stable-diffusion): +While the API is running, you can install the model by using the `/models/apply` endpoint and point it to the `stablediffusion` model in the [models-gallery](https://github.com/mudler/LocalAI/tree/master/gallery#image-generation-stable-diffusion): ```bash curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{ - "url": "github:go-skynet/model-gallery/stablediffusion.yaml" + "url": "github:mudler/LocalAI/gallery/stablediffusion.yaml@master" }' ``` @@ -361,13 +335,13 @@ curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{ You can set the `PRELOAD_MODELS` environment variable: ```bash -PRELOAD_MODELS=[{"url": "github:go-skynet/model-gallery/stablediffusion.yaml"}] +PRELOAD_MODELS=[{"url": "github:mudler/LocalAI/gallery/stablediffusion.yaml@master"}] ``` or as arg: ```bash -local-ai --preload-models '[{"url": "github:go-skynet/model-gallery/stablediffusion.yaml"}]' +local-ai --preload-models '[{"url": "github:mudler/LocalAI/gallery/stablediffusion.yaml@master"}]' ``` or in a YAML file: @@ -378,7 +352,7 @@ local-ai --preload-models-config "/path/to/yaml" YAML: ```yaml -- url: github:go-skynet/model-gallery/stablediffusion.yaml +- url: github:mudler/LocalAI/gallery/stablediffusion.yaml@master ``` {{% /tab %}} @@ -403,7 +377,7 @@ URL: https://github.com/ggerganov/whisper.cpp ```bash curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{ - "url": "github:go-skynet/model-gallery/whisper-base.yaml", + "url": "github:mudler/LocalAI/gallery/whisper-base.yaml@master", "name": "whisper-1" }' ``` @@ -414,13 +388,13 @@ curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{ You can set the `PRELOAD_MODELS` environment variable: ```bash -PRELOAD_MODELS=[{"url": "github:go-skynet/model-gallery/whisper-base.yaml", "name": "whisper-1"}] +PRELOAD_MODELS=[{"url": "github:mudler/LocalAI/gallery/whisper-base.yaml@master", "name": "whisper-1"}] ``` or as arg: ```bash -local-ai --preload-models '[{"url": "github:go-skynet/model-gallery/whisper-base.yaml", "name": "whisper-1"}]' +local-ai --preload-models '[{"url": "github:mudler/LocalAI/gallery/whisper-base.yaml@master", "name": "whisper-1"}]' ``` or in a YAML file: @@ -431,37 +405,13 @@ local-ai --preload-models-config "/path/to/yaml" YAML: ```yaml -- url: github:go-skynet/model-gallery/whisper-base.yaml +- url: github:mudler/LocalAI/gallery/whisper-base.yaml@master name: whisper-1 ``` {{% /tab %}} {{< /tabs >}} -### GPTs - -
- -```bash -LOCALAI=http://localhost:8080 -curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{ - "url": "github:go-skynet/model-gallery/gpt4all-j.yaml", - "name": "gpt4all-j" - }' -``` - -To test it: - -``` -curl $LOCALAI/v1/chat/completions -H "Content-Type: application/json" -d '{ - "model": "gpt4all-j", - "messages": [{"role": "user", "content": "How are you?"}], - "temperature": 0.1 - }' -``` - -
- ### Note LocalAI will create a batch process that downloads the required files from a model definition and automatically reload itself to include the new model. @@ -495,7 +445,7 @@ Returns an `uuid` and an `url` to follow up the state of the process: { "uuid":"251475c9-f666-11ed-95e0-9a8a4480ac58", "status":"http://localhost:8080/models/jobs/251475c9-f666-11ed-95e0-9a8a4480ac58"} ``` -To see a collection example of curated models definition files, see the [model-gallery](https://github.com/go-skynet/model-gallery). +To see a collection example of curated models definition files, see the [LocalAI repository](https://github.com/mudler/LocalAI/tree/master/gallery). #### Get model job state `/models/jobs/` diff --git a/docs/content/docs/features/reranker.md b/docs/content/docs/features/reranker.md new file mode 100644 index 00000000..92c406df --- /dev/null +++ b/docs/content/docs/features/reranker.md @@ -0,0 +1,57 @@ + ++++ +disableToc = false +title = " Reranker" +weight = 11 +url = "/features/reranker/" ++++ + +A **reranking** model, often referred to as a cross-encoder, is a core component in the two-stage retrieval systems used in information retrieval and natural language processing tasks. +Given a query and a set of documents, it will output similarity scores. + +We can use then the score to reorder the documents by relevance in our RAG system to increase its overall accuracy and filter out non-relevant results. + +![output](https://github.com/mudler/LocalAI/assets/2420543/ede67b25-fac4-4833-ae4f-78290e401e60) + +LocalAI supports reranker models, and you can use them by using the `rerankers` backend, which uses [rerankers](https://github.com/AnswerDotAI/rerankers). + +## Usage + +You can test `rerankers` by using container images with python (this does **NOT** work with `core` images) and a model config file like this, or by installing `cross-encoder` from the gallery in the UI: + +```yaml +name: jina-reranker-v1-base-en +backend: rerankers +parameters: + model: cross-encoder + +# optionally: +# type: flashrank +# diffusers: +# pipeline_type: en # to specify the english language +``` + +and test it with: + +```bash + + curl http://localhost:8080/v1/rerank \ + -H "Content-Type: application/json" \ + -d '{ + "model": "jina-reranker-v1-base-en", + "query": "Organic skincare products for sensitive skin", + "documents": [ + "Eco-friendly kitchenware for modern homes", + "Biodegradable cleaning supplies for eco-conscious consumers", + "Organic cotton baby clothes for sensitive skin", + "Natural organic skincare range for sensitive skin", + "Tech gadgets for smart homes: 2024 edition", + "Sustainable gardening tools and compost solutions", + "Sensitive skin-friendly facial cleansers and toners", + "Organic food wraps and storage solutions", + "All-natural pet food for dogs with allergies", + "Yoga mats made from recycled materials" + ], + "top_n": 3 + }' +``` diff --git a/docs/content/docs/features/text-to-audio.md b/docs/content/docs/features/text-to-audio.md index f53407a1..ebfdda1d 100644 --- a/docs/content/docs/features/text-to-audio.md +++ b/docs/content/docs/features/text-to-audio.md @@ -163,3 +163,7 @@ curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{ "input":"Hello!" }' | aplay ``` + +## Parler-tts + +`parler-tts`. It is possible to install and configure the model directly from the gallery. https://github.com/huggingface/parler-tts \ No newline at end of file diff --git a/docs/content/docs/integrations.md b/docs/content/docs/integrations.md index a7666e77..50f683c3 100644 --- a/docs/content/docs/integrations.md +++ b/docs/content/docs/integrations.md @@ -2,7 +2,7 @@ disableToc = false title = "Integrations" weight = 19 -icon = "rocket_launch" +icon = "sync" +++ diff --git a/docs/content/docs/overview.md b/docs/content/docs/overview.md index f0f59494..15086f6f 100644 --- a/docs/content/docs/overview.md +++ b/docs/content/docs/overview.md @@ -99,8 +99,9 @@ Note that this started just as a fun weekend project by [mudler](https://github. - 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/) - ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/) - 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/) -- 🆕 [Vision API](https://localai.io/features/gpt-vision/) +- 🥽 [Vision API](https://localai.io/features/gpt-vision/) - 💾 [Stores](https://localai.io/stores) +- 🆕 [Reranker](https://localai.io/features/reranker/) ## Contribute and help diff --git a/docs/content/docs/reference/_index.en.md b/docs/content/docs/reference/_index.en.md index 339d2728..d8a8f2a7 100644 --- a/docs/content/docs/reference/_index.en.md +++ b/docs/content/docs/reference/_index.en.md @@ -2,7 +2,7 @@ weight: 23 title: "References" description: "Reference" -icon: science +icon: menu_book lead: "" date: 2020-10-06T08:49:15+00:00 lastmod: 2020-10-06T08:49:15+00:00 From a78cd677375a936d4f7ebc8212aeac847403abe1 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 28 Apr 2024 19:30:23 +0200 Subject: [PATCH 0157/2648] Update quickstart.md Signed-off-by: Ettore Di Giacinto --- docs/content/docs/getting-started/quickstart.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/content/docs/getting-started/quickstart.md b/docs/content/docs/getting-started/quickstart.md index ab45e5aa..9fe57cef 100644 --- a/docs/content/docs/getting-started/quickstart.md +++ b/docs/content/docs/getting-started/quickstart.md @@ -30,7 +30,7 @@ Before you begin, ensure you have a container engine installed if you are not us > _Do you have already a model file? Skip to [Run models manually]({{%relref "docs/getting-started/manual" %}}) or [Run other models]({{%relref "docs/getting-started/run-other-models" %}}) to use an already-configured model_. -LocalAI's All-in-One (AIO) images are pre-configured with a set of models and backends to fully leverage almost all the LocalAI featureset. +LocalAI's All-in-One (AIO) images are pre-configured with a set of models and backends to fully leverage almost all the LocalAI featureset. If you don't need models pre-configured, you can use the standard [images]({{%relref "docs/reference/container-images" %}}). These images are available for both CPU and GPU environments. The AIO images are designed to be easy to use and requires no configuration. From 26e18925215ed5bd8caf1cda2270053e10c9d9ec Mon Sep 17 00:00:00 2001 From: Sijia Lu <46901221+LeonSijiaLu@users.noreply.github.com> Date: Sun, 28 Apr 2024 13:38:02 -0400 Subject: [PATCH 0158/2648] Issue-1720: Updated `Build on mac` documentations (#2171) updated build on macs documentation Signed-off-by: LeonSijiaLu --- docs/content/docs/getting-started/build.md | 24 +++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/docs/content/docs/getting-started/build.md b/docs/content/docs/getting-started/build.md index a4db135e..7e585ab3 100644 --- a/docs/content/docs/getting-started/build.md +++ b/docs/content/docs/getting-started/build.md @@ -154,11 +154,11 @@ cd LocalAI # build the binary make build -# Download gpt4all-j to models/ -wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j +# Download phi-2 to models/ +wget https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q2_K.gguf -O models/phi-2.Q2_K # Use a template from the examples -cp -rf prompt-templates/ggml-gpt4all-j.tmpl models/ +cp -rf prompt-templates/ggml-gpt4all-j.tmpl models/phi-2.Q2_K.tmpl # Run LocalAI ./local-ai --models-path=./models/ --debug=true @@ -167,7 +167,7 @@ cp -rf prompt-templates/ggml-gpt4all-j.tmpl models/ curl http://localhost:8080/v1/models curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ - "model": "ggml-gpt4all-j", + "model": "phi-2.Q2_K", "messages": [{"role": "user", "content": "How are you?"}], "temperature": 0.9 }' @@ -175,9 +175,19 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso #### Troublshooting mac -If you encounter errors regarding a missing utility metal, install `Xcode` from the App Store. -If completions are slow, ensure that `gpu-layers` in your model yaml matches the number of layers from the model in use (or simply use a high number such as 256). -If you a get a compile error: `error: only virtual member functions can be marked 'final'`, reinstall all the necessary brew packages, clean the build, and try again. +1. If you encounter errors regarding a missing utility metal, install `Xcode` from the App Store. +2. After the installation of Xcode, if you receive a xcrun error `'xcrun: error: unable to find utility "metal", not a developer tool or in PATH'`. You might have installed the Xcode command line tools before installing Xcode, the former one is pointing to an incomplete SDK. + +``` +# print /Library/Developer/CommandLineTools, if command line tools were installed in advance +xcode-select --print-path + +# point to a complete SDK +sudo xcode-select --switch /Applications/Xcode.app/Contents/Developer +``` + +3. If completions are slow, ensure that `gpu-layers` in your model yaml matches the number of layers from the model in use (or simply use a high number such as 256). +4. If you a get a compile error: `error: only virtual member functions can be marked 'final'`, reinstall all the necessary brew packages, clean the build, and try again. ``` # reinstall build dependencies From 21974fe1d34a760bd94b53a9b10a6c784452df3c Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Sun, 28 Apr 2024 12:51:53 -0500 Subject: [PATCH 0159/2648] fix: swap to WHISPER_CUDA per deprecation message from whisper.cpp (#2170) Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6ef6e9ab..be1c6d2e 100644 --- a/Makefile +++ b/Makefile @@ -99,7 +99,7 @@ endif ifeq ($(BUILD_TYPE),cublas) CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH) export LLAMA_CUBLAS=1 - export WHISPER_CUBLAS=1 + export WHISPER_CUDA=1 CGO_LDFLAGS_WHISPER+=-L$(CUDA_LIBPATH)/stubs/ -lcuda endif From 987b7ad42d3102f535e8ed8ebd84fc303b66c519 Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Sun, 28 Apr 2024 14:24:16 -0500 Subject: [PATCH 0160/2648] feat: only keep the build artifacts from the grpc build (#2172) * feat: only keep the build artifacts from the grpc build Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * feat: remove separate Cache GRPC build step Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * feat: remove docker inspect step, it is leftover from previous debugging Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --------- Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- .github/workflows/image_build.yml | 28 ---------------------------- Dockerfile | 12 ++++++------ 2 files changed, 6 insertions(+), 34 deletions(-) diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index fb1985fd..7d60d23a 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -201,26 +201,6 @@ jobs: username: ${{ secrets.quayUsername }} password: ${{ secrets.quayPassword }} - - name: Cache GRPC - uses: docker/build-push-action@v5 - with: - builder: ${{ steps.buildx.outputs.name }} - # The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache. - # This means that even the MAKEFLAGS have to be an EXACT match. - # If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch. - build-args: | - GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }} - GRPC_MAKEFLAGS=--jobs=4 --output-sync=target - GRPC_VERSION=v1.58.0 - context: . - file: ./Dockerfile - cache-from: type=gha - target: grpc - platforms: ${{ inputs.platforms }} - push: false - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} - - name: Build and push uses: docker/build-push-action@v5 with: @@ -248,14 +228,6 @@ jobs: tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} - - name: Inspect image - if: github.event_name != 'pull_request' - run: | - docker pull localai/localai:${{ steps.meta.outputs.version }} - docker image inspect localai/localai:${{ steps.meta.outputs.version }} - docker pull quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} - docker image inspect quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} - - name: Build and push AIO image if: inputs.aio != '' uses: docker/build-push-action@v5 diff --git a/Dockerfile b/Dockerfile index 717b3a3a..1c4e24fc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -148,16 +148,16 @@ RUN apt-get update && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc - -WORKDIR /build/grpc/cmake/build - # We install GRPC to a different prefix here so that we can copy in only the build artifacts later # saves several hundred MB on the final docker image size vs copying in the entire GRPC source tree # and running make install in the target container -RUN cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX:PATH=/opt/grpc ../.. && \ +RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \ + mkdir -p /build/grpc/cmake/build && \ + cd /build/grpc/cmake/build && \ + cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX:PATH=/opt/grpc ../.. && \ make && \ - make install + make install && \ + rm -rf /build ################################### ################################### From 01860674c4d95a1bacb2a22fb8c30d086ee25ba8 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 28 Apr 2024 23:41:12 +0200 Subject: [PATCH 0161/2648] :arrow_up: Update ggerganov/llama.cpp (#2176) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index be1c6d2e..5980fc3c 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=4dba7e8114d84241c842b986e008af8b88d1a019 +CPPLLAMA_VERSION?=7bb36ccf91b8a2e92b182dd75624f1fd7cb205ac # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From a24cd4fda0bdd3f7d15da05e8da9131930adc9b7 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 28 Apr 2024 23:41:59 +0200 Subject: [PATCH 0162/2648] docs: enhance and condense few sections (#2178) Signed-off-by: Ettore Di Giacinto --- .../container-images.md | 66 +++++++++++++++++-- .../docs/getting-started/kubernetes.md | 30 +++++++++ docs/content/docs/getting-started/manual.md | 17 +---- .../docs/getting-started/quickstart.md | 37 +++++++++-- docs/content/docs/reference/aio-images.md | 53 --------------- 5 files changed, 124 insertions(+), 79 deletions(-) rename docs/content/docs/{reference => getting-started}/container-images.md (65%) create mode 100644 docs/content/docs/getting-started/kubernetes.md delete mode 100644 docs/content/docs/reference/aio-images.md diff --git a/docs/content/docs/reference/container-images.md b/docs/content/docs/getting-started/container-images.md similarity index 65% rename from docs/content/docs/reference/container-images.md rename to docs/content/docs/getting-started/container-images.md index 6531dd97..aaeb43ec 100644 --- a/docs/content/docs/reference/container-images.md +++ b/docs/content/docs/getting-started/container-images.md @@ -1,13 +1,14 @@ - +++ disableToc = false -title = "Available Container images" -weight = 25 +title = "Run with container images" +weight = 6 +url = '/basics/container/' +ico = "rocket_launch" +++ LocalAI provides a variety of images to support different environments. These images are available on [quay.io](https://quay.io/repository/go-skynet/local-ai?tab=tags) and [Docker Hub](https://hub.docker.com/r/localai/localai). -> _For All-in-One image with a pre-configured set of models and backends, see the [AIO Images]({{%relref "docs/reference/aio-images" %}})._ +All-in-One images comes with a pre-configured set of models and backends, standard images instead do not have any model pre-configured and installed. For GPU Acceleration support for Nvidia video graphic cards, use the Nvidia/CUDA images, if you don't have a GPU, use the CPU images. If you have AMD or Mac Silicon, see the [build section]({{%relref "docs/getting-started/build" %}}). @@ -22,6 +23,62 @@ For GPU Acceleration support for Nvidia video graphic cards, use the Nvidia/CUDA {{% /alert %}} +## All-in-one images + +All-In-One images are images that come pre-configured with a set of models and backends to fully leverage almost all the LocalAI featureset. These images are available for both CPU and GPU environments. The AIO images are designed to be easy to use and requires no configuration. Models configuration can be found [here](https://github.com/mudler/LocalAI/tree/master/aio) separated by size. + +In the AIO images there are models configured with the names of OpenAI models, however, they are really backed by Open Source models. You can find the table below + +| Category | Model name | Real model (CPU) | Real model (GPU) | +| ---- | ---- | ---- | ---- | +| Text Generation | `gpt-4` | `phi-2` | `hermes-2-pro-mistral` | +| Multimodal Vision | `gpt-4-vision-preview` | `bakllava` | `llava-1.6-mistral` | +| Image Generation | `stablediffusion` | `stablediffusion` | `dreamshaper-8` | +| Speech to Text | `whisper-1` | `whisper` with `whisper-base` model | <= same | +| Text to Speech | `tts-1` | `en-us-amy-low.onnx` from `rhasspy/piper` | <= same | +| Embeddings | `text-embedding-ada-002` | `all-MiniLM-L6-v2` in Q4 | `all-MiniLM-L6-v2` | + +### Usage + +Select the image (CPU or GPU) and start the container with Docker: + +```bash +# CPU example +docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu +``` + +LocalAI will automatically download all the required models, and the API will be available at [localhost:8080](http://localhost:8080/v1/models). + +### Available images + +| Description | Quay | Docker Hub | +| --- | --- |-----------------------------------------------| +| Latest images for CPU | `quay.io/go-skynet/local-ai:latest-aio-cpu` | `localai/localai:latest-aio-cpu` | +| Versioned image (e.g. for CPU) | `quay.io/go-skynet/local-ai:{{< version >}}-aio-cpu` | `localai/localai:{{< version >}}-aio-cpu` | +| Latest images for Nvidia GPU (CUDA11) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-11` | `localai/localai:latest-aio-gpu-nvidia-cuda-11` | +| Latest images for Nvidia GPU (CUDA12) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-12` | `localai/localai:latest-aio-gpu-nvidia-cuda-12` | +| Latest images for AMD GPU | `quay.io/go-skynet/local-ai:latest-aio-gpu-hipblas` | `localai/localai:latest-aio-gpu-hipblas` | +| Latest images for Intel GPU (sycl f16) | `quay.io/go-skynet/local-ai:latest-aio-gpu-intel-f16` | `localai/localai:latest-aio-gpu-intel-f16` | +| Latest images for Intel GPU (sycl f32) | `quay.io/go-skynet/local-ai:latest-aio-gpu-intel-f32` | `localai/localai:latest-aio-gpu-intel-f32` | + +### Available environment variables + +The AIO Images are inheriting the same environment variables as the base images and the environment of LocalAI (that you can inspect by calling `--help`). However, it supports additional environment variables available only from the container image + +| Variable | Default | Description | +| ---------------------| ------- | ----------- | +| `PROFILE` | Auto-detected | The size of the model to use. Available: `cpu`, `gpu-8g` | +| `MODELS` | Auto-detected | A list of models YAML Configuration file URI/URL (see also [running models]({{%relref "docs/getting-started/run-other-models" %}})) | + + +## Standard container images + +Standard container images do not have pre-installed models. + +Images are available with and without python dependencies. Note that images with python dependencies are bigger (in order of 17GB). + +Images with `core` in the tag are smaller and do not contain any python dependencies. + {{< tabs tabTotal="6" >}} {{% tab tabName="Vanilla / CPU Images" %}} @@ -100,4 +157,3 @@ For GPU Acceleration support for Nvidia video graphic cards, use the Nvidia/CUDA ## See Also - [GPU acceleration]({{%relref "docs/features/gpu-acceleration" %}}) -- [AIO Images]({{%relref "docs/reference/aio-images" %}}) \ No newline at end of file diff --git a/docs/content/docs/getting-started/kubernetes.md b/docs/content/docs/getting-started/kubernetes.md new file mode 100644 index 00000000..17971b37 --- /dev/null +++ b/docs/content/docs/getting-started/kubernetes.md @@ -0,0 +1,30 @@ ++++ +disableToc = false +title = "Run with Kubernetes" +weight = 6 +url = '/basics/kubernetes/' +ico = "rocket_launch" ++++ + +For installing LocalAI in Kubernetes, you can use the `go-skynet` helm chart: + +```bash +# Install the helm repository +helm repo add go-skynet https://go-skynet.github.io/helm-charts/ +# Update the repositories +helm repo update +# Get the values +helm show values go-skynet/local-ai > values.yaml + +# Edit the values value if needed +# vim values.yaml ... + +# Install the helm chart +helm install local-ai go-skynet/local-ai -f values.yaml +``` + +If you prefer to install from manifest file, you can install from the deployment file, and customize as you like: + +``` +kubectl apply -f https://raw.githubusercontent.com/mudler/LocalAI/master/examples/kubernetes/deployment.yaml +``` \ No newline at end of file diff --git a/docs/content/docs/getting-started/manual.md b/docs/content/docs/getting-started/manual.md index c2da82f7..befc0244 100644 --- a/docs/content/docs/getting-started/manual.md +++ b/docs/content/docs/getting-started/manual.md @@ -131,22 +131,7 @@ Note: If you are on Windows, please make sure the project is on the Linux Filesy {{% tab tabName="Kubernetes" %}} -For installing LocalAI in Kubernetes, you can use the following helm chart: - -```bash -# Install the helm repository -helm repo add go-skynet https://go-skynet.github.io/helm-charts/ -# Update the repositories -helm repo update -# Get the values -helm show values go-skynet/local-ai > values.yaml - -# Edit the values value if needed -# vim values.yaml ... - -# Install the helm chart -helm install local-ai go-skynet/local-ai -f values.yaml -``` +See the [Kubernetes section]({{%relref "docs/getting-started/kubernetes" %}}). {{% /tab %}} {{% tab tabName="From binary" %}} diff --git a/docs/content/docs/getting-started/quickstart.md b/docs/content/docs/getting-started/quickstart.md index 9fe57cef..d4d9d7da 100644 --- a/docs/content/docs/getting-started/quickstart.md +++ b/docs/content/docs/getting-started/quickstart.md @@ -30,7 +30,7 @@ Before you begin, ensure you have a container engine installed if you are not us > _Do you have already a model file? Skip to [Run models manually]({{%relref "docs/getting-started/manual" %}}) or [Run other models]({{%relref "docs/getting-started/run-other-models" %}}) to use an already-configured model_. -LocalAI's All-in-One (AIO) images are pre-configured with a set of models and backends to fully leverage almost all the LocalAI featureset. If you don't need models pre-configured, you can use the standard [images]({{%relref "docs/reference/container-images" %}}). +LocalAI's All-in-One (AIO) images are pre-configured with a set of models and backends to fully leverage almost all the LocalAI featureset. If you don't need models pre-configured, you can use the standard [images]({{%relref "docs/getting-started/container-images" %}}). These images are available for both CPU and GPU environments. The AIO images are designed to be easy to use and requires no configuration. @@ -91,7 +91,7 @@ services: # capabilities: [gpu] ``` -For a list of all the container-images available, see [Container images]({{%relref "docs/reference/container-images" %}}). To learn more about All-in-one images instead, see [All-in-one Images]({{%relref "docs/reference/aio-images" %}}). +For a list of all the container-images available, see [Container images]({{%relref "docs/getting-started/container-images" %}}). To learn more about All-in-one images instead, see [All-in-one Images]({{%relref "docs/getting-started/container-images" %}}). {{% alert icon="💡" %}} @@ -114,9 +114,36 @@ docker run -p 8080:8080 --name local-ai -ti -v localai-models:/build/models loca {{% /alert %}} +## From binary + +LocalAI is available as a standalone binary as well. Binaries are compiled for Linux and MacOS and automatically uploaded in the Github releases. Windows is known to work with WSL. + +You can check out the releases in https://github.com/mudler/LocalAI/releases. + +{{< tabs tabTotal="2" >}} +{{% tab tabName="Linux" %}} +| CPU flagset | Link | +| --- | --- | +| avx2 | [Download](https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-avx2-Linux-x86_64) | +| avx512 | [Download](https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-avx512-Linux-x86_64) | +| avx | [Download](https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-avx-Linux-x86_64) | +{{% /tab %}} +{{% tab tabName="MacOS" %}} +| CPU flagset | Link | +| --- | --- | +| avx2 | [Download](https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-avx2-Darwin-arm64) | +| avx512 | [Download](https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-avx512-Darwin-arm64) | +| avx | [Download](https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-avx-Darwin-arm64) | + +{{% /tab %}} + +{{< /tabs >}} + ## Try it out -LocalAI does not ship a webui by default, however you can use 3rd party projects to interact with it (see also [Integrations]({{%relref "docs/integrations" %}}) ). However, you can test out the API endpoints using `curl`, you can find few examples below. +Connect to LocalAI, by default the WebUI should be accessible from http://localhost:8080 . You can also use 3rd party projects to interact with LocalAI as you would use OpenAI (see also [Integrations]({{%relref "docs/integrations" %}}) ). + +You can also test out the API endpoints using `curl`, examples below. ### Text Generation @@ -300,6 +327,6 @@ Explore further resources and community contributions: - [Build LocalAI and the container image]({{%relref "docs/getting-started/build" %}}) - [Run models manually]({{%relref "docs/getting-started/manual" %}}) - [Run other models]({{%relref "docs/getting-started/run-other-models" %}}) -- [Container images]({{%relref "docs/reference/container-images" %}}) -- [All-in-one Images]({{%relref "docs/reference/aio-images" %}}) +- [Container images]({{%relref "docs/getting-started/container-images" %}}) +- [All-in-one Images]({{%relref "docs/getting-started/container-images" %}}) - [Examples](https://github.com/mudler/LocalAI/tree/master/examples#examples) diff --git a/docs/content/docs/reference/aio-images.md b/docs/content/docs/reference/aio-images.md deleted file mode 100644 index b5253ee4..00000000 --- a/docs/content/docs/reference/aio-images.md +++ /dev/null @@ -1,53 +0,0 @@ - -+++ -disableToc = false -title = "All-In-One images" -weight = 26 -+++ - -All-In-One images are images that come pre-configured with a set of models and backends to fully leverage almost all the LocalAI featureset. These images are available for both CPU and GPU environments. The AIO images are designed to be easy to use and requires no configuration. Models configuration can be found [here](https://github.com/mudler/LocalAI/tree/master/aio) separated by size. - -In the AIO images there are models configured with the names of OpenAI models, however, they are really backed by Open Source models. You can find the table below - -| Category | Model name | Real model (CPU) | Real model (GPU) | -| ---- | ---- | ---- | ---- | -| Text Generation | `gpt-4` | `phi-2` | `hermes-2-pro-mistral` | -| Multimodal Vision | `gpt-4-vision-preview` | `bakllava` | `llava-1.6-mistral` | -| Image Generation | `stablediffusion` | `stablediffusion` | `dreamshaper-8` | -| Speech to Text | `whisper-1` | `whisper` with `whisper-base` model | <= same | -| Text to Speech | `tts-1` | `en-us-amy-low.onnx` from `rhasspy/piper` | <= same | -| Embeddings | `text-embedding-ada-002` | `all-MiniLM-L6-v2` in Q4 | `all-MiniLM-L6-v2` | - -## Usage - -Select the image (CPU or GPU) and start the container with Docker: - -```bash -# CPU example -docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu -``` - -LocalAI will automatically download all the required models, and the API will be available at [localhost:8080](http://localhost:8080/v1/models). - -## Available images - -| Description | Quay | Docker Hub | -| --- | --- |-----------------------------------------------| -| Latest images for CPU | `quay.io/go-skynet/local-ai:latest-aio-cpu` | `localai/localai:latest-aio-cpu` | -| Versioned image (e.g. for CPU) | `quay.io/go-skynet/local-ai:{{< version >}}-aio-cpu` | `localai/localai:{{< version >}}-aio-cpu` | -| Latest images for Nvidia GPU (CUDA11) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-11` | `localai/localai:latest-aio-gpu-nvidia-cuda-11` | -| Latest images for Nvidia GPU (CUDA12) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-12` | `localai/localai:latest-aio-gpu-nvidia-cuda-12` | -| Latest images for AMD GPU | `quay.io/go-skynet/local-ai:latest-aio-gpu-hipblas` | `localai/localai:latest-aio-gpu-hipblas` | -| Latest images for Intel GPU (sycl f16) | `quay.io/go-skynet/local-ai:latest-aio-gpu-intel-f16` | `localai/localai:latest-aio-gpu-intel-f16` | -| Latest images for Intel GPU (sycl f32) | `quay.io/go-skynet/local-ai:latest-aio-gpu-intel-f32` | `localai/localai:latest-aio-gpu-intel-f32` | - -## Available environment variables - -The AIO Images are inheriting the same environment variables as the base images and the environment of LocalAI (that you can inspect by calling `--help`). However, it supports additional environment variables available only from the container image - -| Variable | Default | Description | -| ---------------------| ------- | ----------- | -| `PROFILE` | Auto-detected | The size of the model to use. Available: `cpu`, `gpu-8g` | -| `MODELS` | Auto-detected | A list of models YAML Configuration file URI/URL (see also [running models]({{%relref "docs/getting-started/run-other-models" %}})) | - - From e8d44447ad49679cb877a4aa025e8d6e030e9d5b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 28 Apr 2024 23:42:46 +0200 Subject: [PATCH 0163/2648] feat(gallery): support model deletion (#2173) * feat(gallery): op now supports deletion of models Signed-off-by: Ettore Di Giacinto * Wire things with WebUI(WIP) Signed-off-by: Ettore Di Giacinto * minor improvements Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto --- core/config/backend_config.go | 68 +++++++++++++++++++++----- core/http/elements/gallery.go | 43 ++++++++++++---- core/http/endpoints/localai/gallery.go | 21 ++++++++ core/http/routes/localai.go | 2 + core/http/routes/ui.go | 44 ++++++++++++++++- core/services/gallery.go | 57 ++++++++++++++++----- pkg/gallery/gallery.go | 46 +++++++++++++++++ pkg/gallery/gallery_suite_test.go | 7 +++ pkg/gallery/models.go | 17 ++++++- pkg/gallery/models_test.go | 15 ++++++ pkg/gallery/op.go | 2 + pkg/model/loader.go | 8 ++- 12 files changed, 294 insertions(+), 36 deletions(-) diff --git a/core/config/backend_config.go b/core/config/backend_config.go index 35e0776d..0d7d0cbf 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -184,6 +184,36 @@ func (c *BackendConfig) ShouldCallSpecificFunction() bool { return len(c.functionCallNameString) > 0 } +// MMProjFileName returns the filename of the MMProj file +// If the MMProj is a URL, it will return the MD5 of the URL which is the filename +func (c *BackendConfig) MMProjFileName() string { + modelURL := downloader.ConvertURL(c.MMProj) + if downloader.LooksLikeURL(modelURL) { + return utils.MD5(modelURL) + } + + return c.MMProj +} + +func (c *BackendConfig) IsMMProjURL() bool { + return downloader.LooksLikeURL(downloader.ConvertURL(c.MMProj)) +} + +func (c *BackendConfig) IsModelURL() bool { + return downloader.LooksLikeURL(downloader.ConvertURL(c.Model)) +} + +// ModelFileName returns the filename of the model +// If the model is a URL, it will return the MD5 of the URL which is the filename +func (c *BackendConfig) ModelFileName() string { + modelURL := downloader.ConvertURL(c.Model) + if downloader.LooksLikeURL(modelURL) { + return utils.MD5(modelURL) + } + + return c.Model +} + func (c *BackendConfig) FunctionToCall() string { if c.functionCallNameString != "" && c.functionCallNameString != "none" && c.functionCallNameString != "auto" { @@ -532,16 +562,13 @@ func (cl *BackendConfigLoader) Preload(modelPath string) error { } } - modelURL := config.PredictionOptions.Model - modelURL = downloader.ConvertURL(modelURL) - - if downloader.LooksLikeURL(modelURL) { - // md5 of model name - md5Name := utils.MD5(modelURL) - + // If the model is an URL, expand it, and download the file + if config.IsModelURL() { + modelFileName := config.ModelFileName() + modelURL := downloader.ConvertURL(config.Model) // check if file exists - if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) { - err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", 0, 0, status) + if _, err := os.Stat(filepath.Join(modelPath, modelFileName)); errors.Is(err, os.ErrNotExist) { + err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, modelFileName), "", 0, 0, status) if err != nil { return err } @@ -549,9 +576,27 @@ func (cl *BackendConfigLoader) Preload(modelPath string) error { cc := cl.configs[i] c := &cc - c.PredictionOptions.Model = md5Name + c.PredictionOptions.Model = modelFileName cl.configs[i] = *c } + + if config.IsMMProjURL() { + modelFileName := config.MMProjFileName() + modelURL := downloader.ConvertURL(config.MMProj) + // check if file exists + if _, err := os.Stat(filepath.Join(modelPath, modelFileName)); errors.Is(err, os.ErrNotExist) { + err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, modelFileName), "", 0, 0, status) + if err != nil { + return err + } + } + + cc := cl.configs[i] + c := &cc + c.MMProj = modelFileName + cl.configs[i] = *c + } + if cl.configs[i].Name != "" { glamText(fmt.Sprintf("**Model name**: _%s_", cl.configs[i].Name)) } @@ -586,7 +631,8 @@ func (cm *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...C } for _, file := range files { // Skip templates, YAML and .keep files - if !strings.Contains(file.Name(), ".yaml") && !strings.Contains(file.Name(), ".yml") { + if !strings.Contains(file.Name(), ".yaml") && !strings.Contains(file.Name(), ".yml") || + strings.HasPrefix(file.Name(), ".") { continue } c, err := ReadBackendConfig(filepath.Join(path, file.Name()), opts...) diff --git a/core/http/elements/gallery.go b/core/http/elements/gallery.go index 6edbd23d..8093b042 100644 --- a/core/http/elements/gallery.go +++ b/core/http/elements/gallery.go @@ -13,7 +13,7 @@ const ( NoImage = "https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg" ) -func DoneProgress(uid string) string { +func DoneProgress(uid, text string) string { return elem.Div( attrs.Props{}, elem.H3( @@ -23,7 +23,7 @@ func DoneProgress(uid string) string { "tabindex": "-1", "autofocus": "", }, - elem.Text("Installation completed"), + elem.Text(text), ), ).Render() } @@ -60,7 +60,7 @@ func ProgressBar(progress string) string { ).Render() } -func StartProgressBar(uid, progress string) string { +func StartProgressBar(uid, progress, text string) string { if progress == "" { progress = "0" } @@ -77,7 +77,7 @@ func StartProgressBar(uid, progress string) string { "tabindex": "-1", "autofocus": "", }, - elem.Text("Installing"), + elem.Text(text), // This is a simple example of how to use the HTMLX library to create a progress bar that updates every 600ms. elem.Div(attrs.Props{ "hx-get": "/browse/job/progress/" + uid, @@ -106,14 +106,33 @@ func cardSpan(text, icon string) elem.Node { func ListModels(models []*gallery.GalleryModel, installing *xsync.SyncedMap[string, string]) string { //StartProgressBar(uid, "0") modelsElements := []elem.Node{} - span := func(s string) elem.Node { - return elem.Span( + // span := func(s string) elem.Node { + // return elem.Span( + // attrs.Props{ + // "class": "float-right inline-block bg-green-500 text-white py-1 px-3 rounded-full text-xs", + // }, + // elem.Text(s), + // ) + // } + deleteButton := func(m *gallery.GalleryModel) elem.Node { + return elem.Button( attrs.Props{ - "class": "float-right inline-block bg-green-500 text-white py-1 px-3 rounded-full text-xs", + "data-twe-ripple-init": "", + "data-twe-ripple-color": "light", + "class": "float-right inline-block rounded bg-red-800 px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-red-accent-300 hover:shadow-red-2 focus:bg-red-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-red-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong", + "hx-swap": "outerHTML", + // post the Model ID as param + "hx-post": "/browse/delete/model/" + m.Name, }, - elem.Text(s), + elem.I( + attrs.Props{ + "class": "fa-solid fa-cancel pr-2", + }, + ), + elem.Text("Delete"), ) } + installButton := func(m *gallery.GalleryModel) elem.Node { return elem.Button( attrs.Props{ @@ -202,10 +221,14 @@ func ListModels(models []*gallery.GalleryModel, installing *xsync.SyncedMap[stri elem.If( currentlyInstalling, elem.Node( // If currently installing, show progress bar - elem.Raw(StartProgressBar(installing.Get(galleryID), "0")), + elem.Raw(StartProgressBar(installing.Get(galleryID), "0", "Installing")), ), // Otherwise, show install button (if not installed) or display "Installed" elem.If(m.Installed, - span("Installed"), + //elem.Node(elem.Div( + // attrs.Props{}, + // span("Installed"), deleteButton(m), + // )), + deleteButton(m), installButton(m), ), ), diff --git a/core/http/endpoints/localai/gallery.go b/core/http/endpoints/localai/gallery.go index b693e7c3..a74a2bb9 100644 --- a/core/http/endpoints/localai/gallery.go +++ b/core/http/endpoints/localai/gallery.go @@ -74,6 +74,27 @@ func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fibe } } +func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + modelName := c.Params("name") + + mgs.galleryApplier.C <- gallery.GalleryOp{ + Delete: true, + GalleryName: modelName, + } + + uuid, err := uuid.NewUUID() + if err != nil { + return err + } + + return c.JSON(struct { + ID string `json:"uuid"` + StatusURL string `json:"status"` + }{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()}) + } +} + func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint() func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { log.Debug().Msgf("Listing models from galleries: %+v", mgs.galleries) diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go index 6415c894..138babbe 100644 --- a/core/http/routes/localai.go +++ b/core/http/routes/localai.go @@ -23,6 +23,8 @@ func RegisterLocalAIRoutes(app *fiber.App, modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService) app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint()) + app.Post("/models/delete/:name", auth, modelGalleryEndpointService.DeleteModelGalleryEndpoint()) + app.Get("/models/available", auth, modelGalleryEndpointService.ListModelFromGalleryEndpoint()) app.Get("/models/galleries", auth, modelGalleryEndpointService.ListModelGalleriesEndpoint()) app.Post("/models/galleries", auth, modelGalleryEndpointService.AddModelGalleryEndpoint()) diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go index b63b1870..2b8c6b95 100644 --- a/core/http/routes/ui.go +++ b/core/http/routes/ui.go @@ -66,6 +66,12 @@ func RegisterUIRoutes(app *fiber.App, return c.SendString(elements.ListModels(filteredModels, installingModels)) }) + /* + + Install routes + + */ + // This route is used when the "Install" button is pressed, we submit here a new job to the gallery service // https://htmx.org/examples/progress-bar/ app.Post("/browse/install/model/:id", auth, func(c *fiber.Ctx) error { @@ -89,7 +95,33 @@ func RegisterUIRoutes(app *fiber.App, galleryService.C <- op }() - return c.SendString(elements.StartProgressBar(uid, "0")) + return c.SendString(elements.StartProgressBar(uid, "0", "Installation")) + }) + + // This route is used when the "Install" button is pressed, we submit here a new job to the gallery service + // https://htmx.org/examples/progress-bar/ + app.Post("/browse/delete/model/:id", auth, func(c *fiber.Ctx) error { + galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests! + + id, err := uuid.NewUUID() + if err != nil { + return err + } + + uid := id.String() + + installingModels.Set(galleryID, uid) + + op := gallery.GalleryOp{ + Id: uid, + Delete: true, + GalleryName: galleryID, + } + go func() { + galleryService.C <- op + }() + + return c.SendString(elements.StartProgressBar(uid, "0", "Deletion")) }) // Display the job current progress status @@ -118,12 +150,20 @@ func RegisterUIRoutes(app *fiber.App, // this route is hit when the job is done, and we display the // final state (for now just displays "Installation completed") app.Get("/browse/job/:uid", auth, func(c *fiber.Ctx) error { + + status := galleryService.GetStatus(c.Params("uid")) + for _, k := range installingModels.Keys() { if installingModels.Get(k) == c.Params("uid") { installingModels.Delete(k) } } - return c.SendString(elements.DoneProgress(c.Params("uid"))) + displayText := "Installation completed" + if status.Deletion { + displayText = "Deletion completed" + } + + return c.SendString(elements.DoneProgress(c.Params("uid"), displayText)) }) } diff --git a/core/services/gallery.go b/core/services/gallery.go index b068abbb..6a54e38c 100644 --- a/core/services/gallery.go +++ b/core/services/gallery.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "os" + "path/filepath" "strings" "sync" @@ -84,18 +85,47 @@ func (g *GalleryService) Start(c context.Context, cl *config.BackendConfigLoader } var err error - // if the request contains a gallery name, we apply the gallery from the gallery list - if op.GalleryName != "" { - if strings.Contains(op.GalleryName, "@") { - err = gallery.InstallModelFromGallery(op.Galleries, op.GalleryName, g.modelPath, op.Req, progressCallback) - } else { - err = gallery.InstallModelFromGalleryByName(op.Galleries, op.GalleryName, g.modelPath, op.Req, progressCallback) + + // delete a model + if op.Delete { + modelConfig := &config.BackendConfig{} + // Galleryname is the name of the model in this case + dat, err := os.ReadFile(filepath.Join(g.modelPath, op.GalleryName+".yaml")) + if err != nil { + updateError(err) + continue } - } else if op.ConfigURL != "" { - startup.PreloadModelsConfigurations(op.ConfigURL, g.modelPath, op.ConfigURL) - err = cl.Preload(g.modelPath) + err = yaml.Unmarshal(dat, modelConfig) + if err != nil { + updateError(err) + continue + } + + files := []string{} + // Remove the model from the config + if modelConfig.Model != "" { + files = append(files, modelConfig.ModelFileName()) + } + + if modelConfig.MMProj != "" { + files = append(files, modelConfig.MMProjFileName()) + } + + err = gallery.DeleteModelFromSystem(g.modelPath, op.GalleryName, files) } else { - err = prepareModel(g.modelPath, op.Req, cl, progressCallback) + // if the request contains a gallery name, we apply the gallery from the gallery list + if op.GalleryName != "" { + if strings.Contains(op.GalleryName, "@") { + err = gallery.InstallModelFromGallery(op.Galleries, op.GalleryName, g.modelPath, op.Req, progressCallback) + } else { + err = gallery.InstallModelFromGalleryByName(op.Galleries, op.GalleryName, g.modelPath, op.Req, progressCallback) + } + } else if op.ConfigURL != "" { + startup.PreloadModelsConfigurations(op.ConfigURL, g.modelPath, op.ConfigURL) + err = cl.Preload(g.modelPath) + } else { + err = prepareModel(g.modelPath, op.Req, cl, progressCallback) + } } if err != nil { @@ -116,7 +146,12 @@ func (g *GalleryService) Start(c context.Context, cl *config.BackendConfigLoader continue } - g.UpdateStatus(op.Id, &gallery.GalleryOpStatus{Processed: true, Message: "completed", Progress: 100}) + g.UpdateStatus(op.Id, + &gallery.GalleryOpStatus{ + Deletion: op.Delete, + Processed: true, + Message: "completed", + Progress: 100}) } } }() diff --git a/pkg/gallery/gallery.go b/pkg/gallery/gallery.go index c4575817..d90ce4d9 100644 --- a/pkg/gallery/gallery.go +++ b/pkg/gallery/gallery.go @@ -1,6 +1,7 @@ package gallery import ( + "errors" "fmt" "os" "path/filepath" @@ -184,3 +185,48 @@ func getGalleryModels(gallery Gallery, basePath string) ([]*GalleryModel, error) } return models, nil } + +func DeleteModelFromSystem(basePath string, name string, additionalFiles []string) error { + // os.PathSeparator is not allowed in model names. Replace them with "__" to avoid conflicts with file paths. + name = strings.ReplaceAll(name, string(os.PathSeparator), "__") + + configFile := filepath.Join(basePath, fmt.Sprintf("%s.yaml", name)) + + galleryFile := filepath.Join(basePath, galleryFileName(name)) + + var err error + // Delete all the files associated to the model + // read the model config + galleryconfig, err := ReadConfigFile(galleryFile) + if err != nil { + log.Error().Err(err).Msgf("failed to read gallery file %s", configFile) + } + + // Remove additional files + if galleryconfig != nil { + for _, f := range galleryconfig.Files { + fullPath := filepath.Join(basePath, f.Filename) + log.Debug().Msgf("Removing file %s", fullPath) + if e := os.Remove(fullPath); e != nil { + err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", f.Filename, e)) + } + } + } + + for _, f := range additionalFiles { + fullPath := filepath.Join(filepath.Join(basePath, f)) + log.Debug().Msgf("Removing additional file %s", fullPath) + if e := os.Remove(fullPath); e != nil { + err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", f, e)) + } + } + + log.Debug().Msgf("Removing model config file %s", configFile) + + // Delete the model config file + if e := os.Remove(configFile); e != nil { + err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", configFile, e)) + } + + return err +} diff --git a/pkg/gallery/gallery_suite_test.go b/pkg/gallery/gallery_suite_test.go index 44256bc2..bf13cac9 100644 --- a/pkg/gallery/gallery_suite_test.go +++ b/pkg/gallery/gallery_suite_test.go @@ -1,6 +1,7 @@ package gallery_test import ( + "os" "testing" . "github.com/onsi/ginkgo/v2" @@ -11,3 +12,9 @@ func TestGallery(t *testing.T) { RegisterFailHandler(Fail) RunSpecs(t, "Gallery test suite") } + +var _ = BeforeSuite(func() { + if os.Getenv("FIXTURES") == "" { + Fail("FIXTURES env var not set") + } +}) diff --git a/pkg/gallery/models.go b/pkg/gallery/models.go index 2ab4c832..1fc6c0a2 100644 --- a/pkg/gallery/models.go +++ b/pkg/gallery/models.go @@ -178,5 +178,20 @@ func InstallModel(basePath, nameOverride string, config *Config, configOverrides log.Debug().Msgf("Written config file %s", configFilePath) } - return nil + // Save the model gallery file for further reference + modelFile := filepath.Join(basePath, galleryFileName(name)) + data, err := yaml.Marshal(config) + if err != nil { + return err + } + + log.Debug().Msgf("Written gallery file %s", modelFile) + + return os.WriteFile(modelFile, data, 0600) + + //return nil +} + +func galleryFileName(name string) string { + return "._gallery_" + name + ".yaml" } diff --git a/pkg/gallery/models_test.go b/pkg/gallery/models_test.go index 6eb63128..bfc2b9a6 100644 --- a/pkg/gallery/models_test.go +++ b/pkg/gallery/models_test.go @@ -1,6 +1,7 @@ package gallery_test import ( + "errors" "os" "path/filepath" @@ -11,6 +12,7 @@ import ( ) var _ = Describe("Model test", func() { + Context("Downloading", func() { It("applies model correctly", func() { tempdir, err := os.MkdirTemp("", "test") @@ -80,6 +82,19 @@ var _ = Describe("Model test", func() { Expect(err).ToNot(HaveOccurred()) Expect(len(models)).To(Equal(1)) Expect(models[0].Installed).To(BeTrue()) + + // delete + err = DeleteModelFromSystem(tempdir, "bert", []string{}) + Expect(err).ToNot(HaveOccurred()) + + models, err = AvailableGalleryModels(galleries, tempdir) + Expect(err).ToNot(HaveOccurred()) + Expect(len(models)).To(Equal(1)) + Expect(models[0].Installed).To(BeFalse()) + + _, err = os.Stat(filepath.Join(tempdir, "bert.yaml")) + Expect(err).To(HaveOccurred()) + Expect(errors.Is(err, os.ErrNotExist)).To(BeTrue()) }) It("renames model correctly", func() { diff --git a/pkg/gallery/op.go b/pkg/gallery/op.go index 73d748bf..4637820a 100644 --- a/pkg/gallery/op.go +++ b/pkg/gallery/op.go @@ -4,12 +4,14 @@ type GalleryOp struct { Id string GalleryName string ConfigURL string + Delete bool Req GalleryModel Galleries []Gallery } type GalleryOpStatus struct { + Deletion bool `json:"deletion"` // Deletion is true if the operation is a deletion FileName string `json:"file_name"` Error error `json:"error"` Processed bool `json:"processed"` diff --git a/pkg/model/loader.go b/pkg/model/loader.go index 1b5c9aa0..2d6b3acb 100644 --- a/pkg/model/loader.go +++ b/pkg/model/loader.go @@ -96,7 +96,13 @@ func (ml *ModelLoader) ListModels() ([]string, error) { models := []string{} for _, file := range files { // Skip templates, YAML, .keep, .json, and .DS_Store files - TODO: as this list grows, is there a more efficient method? - if strings.HasSuffix(file.Name(), ".tmpl") || strings.HasSuffix(file.Name(), ".keep") || strings.HasSuffix(file.Name(), ".yaml") || strings.HasSuffix(file.Name(), ".yml") || strings.HasSuffix(file.Name(), ".json") || strings.HasSuffix(file.Name(), ".DS_Store") { + if strings.HasSuffix(file.Name(), ".tmpl") || + strings.HasSuffix(file.Name(), ".keep") || + strings.HasSuffix(file.Name(), ".yaml") || + strings.HasSuffix(file.Name(), ".yml") || + strings.HasSuffix(file.Name(), ".json") || + strings.HasSuffix(file.Name(), ".DS_Store") || + strings.HasPrefix(file.Name(), ".") { continue } From 067489364934919e99647c5c966864d8c43468f1 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 28 Apr 2024 23:56:10 +0200 Subject: [PATCH 0164/2648] Update .env Signed-off-by: Ettore Di Giacinto --- .env | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.env b/.env index 35d4f2d7..ea2d4e35 100644 --- a/.env +++ b/.env @@ -10,7 +10,7 @@ # ## Define galleries. ## models will to install will be visible in `/models/available` -# LOCALAI_GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}] +# LOCALAI_GALLERIES=[{"name":"localai", "url":"github:mudler/LocalAI/gallery/index.yaml@master"}] ## CORS settings # LOCALAI_CORS=true @@ -86,4 +86,4 @@ # LOCALAI_WATCHDOG_BUSY=true # # Time in duration format (e.g. 1h30m) after which a backend is considered busy -# LOCALAI_WATCHDOG_BUSY_TIMEOUT=5m \ No newline at end of file +# LOCALAI_WATCHDOG_BUSY_TIMEOUT=5m From 5fef3b0ff15903d6f4f81bcdfb64632934d8c5cc Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Mon, 29 Apr 2024 00:32:45 +0200 Subject: [PATCH 0165/2648] :arrow_up: Update ggerganov/whisper.cpp (#2177) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5980fc3c..60cd3f24 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6 # whisper.cpp version -WHISPER_CPP_VERSION?=858452d58dba3acdc3431c9bced2bb8cfd9bf418 +WHISPER_CPP_VERSION?=22b6598cc9f1454567efa0d816fdc57637243999 # bert.cpp version BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d From 74d903acca4729e6346405ba5098d19c7264960c Mon Sep 17 00:00:00 2001 From: Sijia Lu <46901221+LeonSijiaLu@users.noreply.github.com> Date: Sun, 28 Apr 2024 22:21:51 -0400 Subject: [PATCH 0166/2648] [Documentations] Removed invalid numberings from `troubleshooting mac` (#2174) * updated troubleshooting mac Signed-off-by: LeonSijiaLu * prepend - Signed-off-by: LeonSijiaLu --------- Signed-off-by: LeonSijiaLu --- docs/content/docs/getting-started/build.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/docs/content/docs/getting-started/build.md b/docs/content/docs/getting-started/build.md index 7e585ab3..2b69ef4e 100644 --- a/docs/content/docs/getting-started/build.md +++ b/docs/content/docs/getting-started/build.md @@ -173,10 +173,11 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso }' ``` -#### Troublshooting mac +#### Troubleshooting mac -1. If you encounter errors regarding a missing utility metal, install `Xcode` from the App Store. -2. After the installation of Xcode, if you receive a xcrun error `'xcrun: error: unable to find utility "metal", not a developer tool or in PATH'`. You might have installed the Xcode command line tools before installing Xcode, the former one is pointing to an incomplete SDK. +- If you encounter errors regarding a missing utility metal, install `Xcode` from the App Store. + +- After the installation of Xcode, if you receive a xcrun error `'xcrun: error: unable to find utility "metal", not a developer tool or in PATH'`. You might have installed the Xcode command line tools before installing Xcode, the former one is pointing to an incomplete SDK. ``` # print /Library/Developer/CommandLineTools, if command line tools were installed in advance @@ -186,8 +187,9 @@ xcode-select --print-path sudo xcode-select --switch /Applications/Xcode.app/Contents/Developer ``` -3. If completions are slow, ensure that `gpu-layers` in your model yaml matches the number of layers from the model in use (or simply use a high number such as 256). -4. If you a get a compile error: `error: only virtual member functions can be marked 'final'`, reinstall all the necessary brew packages, clean the build, and try again. +- If completions are slow, ensure that `gpu-layers` in your model yaml matches the number of layers from the model in use (or simply use a high number such as 256). + +- If you a get a compile error: `error: only virtual member functions can be marked 'final'`, reinstall all the necessary brew packages, clean the build, and try again. ``` # reinstall build dependencies From 982dc6a2bd2f509a64bee96a739acad670e56503 Mon Sep 17 00:00:00 2001 From: Dave Date: Sun, 28 Apr 2024 23:55:29 -0400 Subject: [PATCH 0167/2648] fix: github bump_docs.sh regex to drop emoji and other text (#2180) fix: bump_docs regex Signed-off-by: Dave Lee --- .github/bump_docs.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/bump_docs.sh b/.github/bump_docs.sh index 169022aa..e69d3824 100755 --- a/.github/bump_docs.sh +++ b/.github/bump_docs.sh @@ -2,6 +2,6 @@ set -xe REPO=$1 -LATEST_TAG=$(curl -s "https://api.github.com/repos/$REPO/releases/latest" | jq -r '.name') +LATEST_TAG=$(curl -s "https://api.github.com/repos/$REPO/releases/latest" | jq -r '.tag_name') cat <<< $(jq ".version = \"$LATEST_TAG\"" docs/data/version.json) > docs/data/version.json From b7ea9602f5d74c73c56961b853bc99c23a99d1fb Mon Sep 17 00:00:00 2001 From: fakezeta Date: Mon, 29 Apr 2024 15:11:09 +0200 Subject: [PATCH 0168/2648] fix: undefined symbol: iJIT_NotifyEvent in import torch ##2153 (#2179) * add extra index to Intel repository * Update install.sh --- backend/python/common-env/transformers/install.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/python/common-env/transformers/install.sh b/backend/python/common-env/transformers/install.sh index 30ec0de0..ef768bc7 100644 --- a/backend/python/common-env/transformers/install.sh +++ b/backend/python/common-env/transformers/install.sh @@ -26,7 +26,7 @@ if [ -d "/opt/intel" ]; then # Intel GPU: If the directory exists, we assume we are using the intel image # (no conda env) # https://github.com/intel/intel-extension-for-pytorch/issues/538 - pip install intel-extension-for-transformers datasets sentencepiece tiktoken neural_speed optimum[openvino] + pip install torch==2.1.0.post0 torchvision==0.16.0.post0 torchaudio==2.1.0.post0 intel-extension-for-pytorch==2.1.20+xpu oneccl_bind_pt==2.1.200+xpu intel-extension-for-transformers datasets sentencepiece tiktoken neural_speed optimum[openvino] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ fi # If we didn't skip conda, activate the environment From 11c48a0004022e21f12d2ffedc16534bf43acf8b Mon Sep 17 00:00:00 2001 From: Dave Date: Mon, 29 Apr 2024 09:11:42 -0400 Subject: [PATCH 0169/2648] fix: security scanner warning noise: error handlers part 2 (#2145) check off a few more error handlers Signed-off-by: Dave Lee --- core/cli/models.go | 6 +++++- core/cli/transcript.go | 8 +++++++- core/cli/tts.go | 8 +++++++- core/startup/config_file_watcher.go | 14 ++++++++++---- core/startup/startup.go | 5 ++++- pkg/functions/functions.go | 12 ++++++++++-- pkg/functions/parse.go | 10 ++++++++-- pkg/model/initializers.go | 17 ++++++++++++++--- pkg/model/loader.go | 5 ++++- pkg/model/process.go | 16 ++++++++++------ tests/integration/stores_test.go | 5 +++-- 11 files changed, 82 insertions(+), 24 deletions(-) diff --git a/core/cli/models.go b/core/cli/models.go index 6615e21d..5bbb60e6 100644 --- a/core/cli/models.go +++ b/core/cli/models.go @@ -64,7 +64,11 @@ func (mi *ModelsInstall) Run(ctx *Context) error { progressbar.OptionClearOnFinish(), ) progressCallback := func(fileName string, current string, total string, percentage float64) { - progressBar.Set(int(percentage * 10)) + v := int(percentage * 10) + err := progressBar.Set(v) + if err != nil { + log.Error().Err(err).Str("filename", fileName).Int("value", v).Msg("error while updating progress bar") + } } err := gallery.InstallModelFromGallery(galleries, modelName, mi.ModelsPath, gallery.GalleryModel{}, progressCallback) if err != nil { diff --git a/core/cli/transcript.go b/core/cli/transcript.go index 9f36a77c..1f2f779a 100644 --- a/core/cli/transcript.go +++ b/core/cli/transcript.go @@ -8,6 +8,7 @@ import ( "github.com/go-skynet/LocalAI/core/backend" "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/pkg/model" + "github.com/rs/zerolog/log" ) type TranscriptCMD struct { @@ -41,7 +42,12 @@ func (t *TranscriptCMD) Run(ctx *Context) error { c.Threads = &t.Threads - defer ml.StopAllGRPC() + defer func() { + err := ml.StopAllGRPC() + if err != nil { + log.Error().Err(err).Msg("unable to stop all grpc processes") + } + }() tr, err := backend.ModelTranscription(t.Filename, t.Language, ml, c, opts) if err != nil { diff --git a/core/cli/tts.go b/core/cli/tts.go index 1d8fd3a3..d4bd2553 100644 --- a/core/cli/tts.go +++ b/core/cli/tts.go @@ -10,6 +10,7 @@ import ( "github.com/go-skynet/LocalAI/core/backend" "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/pkg/model" + "github.com/rs/zerolog/log" ) type TTSCMD struct { @@ -40,7 +41,12 @@ func (t *TTSCMD) Run(ctx *Context) error { } ml := model.NewModelLoader(opts.ModelPath) - defer ml.StopAllGRPC() + defer func() { + err := ml.StopAllGRPC() + if err != nil { + log.Error().Err(err).Msg("unable to stop all grpc processes") + } + }() options := config.BackendConfig{} options.SetDefaults() diff --git a/core/startup/config_file_watcher.go b/core/startup/config_file_watcher.go index 6bbb367f..259446f1 100644 --- a/core/startup/config_file_watcher.go +++ b/core/startup/config_file_watcher.go @@ -31,8 +31,14 @@ func newConfigFileHandler(appConfig *config.ApplicationConfig) configFileHandler handlers: make(map[string]fileHandler), appConfig: appConfig, } - c.Register("api_keys.json", readApiKeysJson(*appConfig), true) - c.Register("external_backends.json", readExternalBackendsJson(*appConfig), true) + err := c.Register("api_keys.json", readApiKeysJson(*appConfig), true) + if err != nil { + log.Error().Err(err).Str("file", "api_keys.json").Msg("unable to register config file handler") + } + err = c.Register("external_backends.json", readExternalBackendsJson(*appConfig), true) + if err != nil { + log.Error().Err(err).Str("file", "external_backends.json").Msg("unable to register config file handler") + } return c } @@ -118,8 +124,8 @@ func (c *configFileHandler) Watch() error { } // TODO: When we institute graceful shutdown, this should be called -func (c *configFileHandler) Stop() { - c.watcher.Close() +func (c *configFileHandler) Stop() error { + return c.watcher.Close() } func readApiKeysJson(startupAppConfig config.ApplicationConfig) fileHandler { diff --git a/core/startup/startup.go b/core/startup/startup.go index 17bbf9f5..e5660f4c 100644 --- a/core/startup/startup.go +++ b/core/startup/startup.go @@ -100,7 +100,10 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode go func() { <-options.Context.Done() log.Debug().Msgf("Context canceled, shutting down") - ml.StopAllGRPC() + err := ml.StopAllGRPC() + if err != nil { + log.Error().Err(err).Msg("error while stopping all grpc backends") + } }() if options.WatchDog { diff --git a/pkg/functions/functions.go b/pkg/functions/functions.go index d75a2ee3..f5e37d75 100644 --- a/pkg/functions/functions.go +++ b/pkg/functions/functions.go @@ -2,6 +2,8 @@ package functions import ( "encoding/json" + + "github.com/rs/zerolog/log" ) type Function struct { @@ -30,8 +32,14 @@ func (f Functions) ToJSONStructure() JSONFunctionStructure { prop := map[string]interface{}{} defsD := map[string]interface{}{} - json.Unmarshal(dat, &prop) - json.Unmarshal(dat2, &defsD) + err := json.Unmarshal(dat, &prop) + if err != nil { + log.Error().Err(err).Msg("error unmarshalling dat") + } + err = json.Unmarshal(dat2, &defsD) + if err != nil { + log.Error().Err(err).Msg("error unmarshalling dat2") + } if js.Defs == nil { js.Defs = defsD } diff --git a/pkg/functions/parse.go b/pkg/functions/parse.go index 5324e8c6..26312560 100644 --- a/pkg/functions/parse.go +++ b/pkg/functions/parse.go @@ -59,7 +59,10 @@ func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncC if multipleResults { ss := []map[string]interface{}{} s := utils.EscapeNewLines(llmresult) - json.Unmarshal([]byte(s), &ss) + err := json.Unmarshal([]byte(s), &ss) + if err != nil { + log.Error().Err(err).Str("escapedLLMResult", s).Msg("multiple results: unable to unmarshal llm result") + } log.Debug().Msgf("Function return: %s %+v", s, ss) for _, s := range ss { @@ -83,7 +86,10 @@ func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncC ss := map[string]interface{}{} // This prevent newlines to break JSON parsing for clients s := utils.EscapeNewLines(llmresult) - json.Unmarshal([]byte(s), &ss) + err := json.Unmarshal([]byte(s), &ss) + if err != nil { + log.Error().Err(err).Str("escapedLLMResult", s).Msg("unable to unmarshal llm result") + } log.Debug().Msgf("Function return: %s %+v", s, ss) // The grammar defines the function name as "function", while OpenAI returns "name" diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index 5d9808a4..5a65d01f 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -70,7 +70,10 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string // If no specific model path is set for transformers/HF, set it to the model path for _, env := range []string{"HF_HOME", "TRANSFORMERS_CACHE", "HUGGINGFACE_HUB_CACHE"} { if os.Getenv(env) == "" { - os.Setenv(env, ml.ModelPath) + err := os.Setenv(env, ml.ModelPath) + if err != nil { + log.Error().Err(err).Str("name", env).Str("modelPath", ml.ModelPath).Msg("unable to set environment variable to modelPath") + } } } @@ -184,8 +187,13 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (client grpc.Backend, err e if o.singleActiveBackend { ml.mu.Lock() log.Debug().Msgf("Stopping all backends except '%s'", o.model) - ml.StopAllExcept(o.model) + err := ml.StopAllExcept(o.model) ml.mu.Unlock() + if err != nil { + log.Error().Err(err).Str("keptModel", o.model).Msg("error while shutting down all backends except for the keptModel") + return nil, err + } + } var backendToConsume string @@ -224,7 +232,10 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) { // If we can have only one backend active, kill all the others (except external backends) if o.singleActiveBackend { log.Debug().Msgf("Stopping all backends except '%s'", o.model) - ml.StopAllExcept(o.model) + err := ml.StopAllExcept(o.model) + if err != nil { + log.Error().Err(err).Str("keptModel", o.model).Msg("error while shutting down all backends except for the keptModel - greedyloader continuing") + } } ml.mu.Unlock() diff --git a/pkg/model/loader.go b/pkg/model/loader.go index 2d6b3acb..8bf9da5a 100644 --- a/pkg/model/loader.go +++ b/pkg/model/loader.go @@ -174,7 +174,10 @@ func (ml *ModelLoader) CheckIsLoaded(s string) ModelAddress { if !ml.grpcProcesses[s].IsAlive() { log.Debug().Msgf("GRPC Process is not responding: %s", s) // stop and delete the process, this forces to re-load the model and re-create again the service - ml.deleteProcess(s) + err := ml.deleteProcess(s) + if err != nil { + log.Error().Err(err).Str("process", s).Msg("error stopping process") + } return "" } } diff --git a/pkg/model/process.go b/pkg/model/process.go index 08822fd9..ff3b12cc 100644 --- a/pkg/model/process.go +++ b/pkg/model/process.go @@ -1,6 +1,7 @@ package model import ( + "errors" "fmt" "os" "os/signal" @@ -14,8 +15,8 @@ import ( "github.com/rs/zerolog/log" ) -func (ml *ModelLoader) StopAllExcept(s string) { - ml.StopGRPC(func(id string, p *process.Process) bool { +func (ml *ModelLoader) StopAllExcept(s string) error { + return ml.StopGRPC(func(id string, p *process.Process) bool { if id != s { for ml.models[id].GRPC(false, ml.wd).IsBusy() { log.Debug().Msgf("%s busy. Waiting.", id) @@ -43,16 +44,19 @@ func includeAllProcesses(_ string, _ *process.Process) bool { return true } -func (ml *ModelLoader) StopGRPC(filter GRPCProcessFilter) { +func (ml *ModelLoader) StopGRPC(filter GRPCProcessFilter) error { + var err error = nil for k, p := range ml.grpcProcesses { if filter(k, p) { - ml.deleteProcess(k) + e := ml.deleteProcess(k) + err = errors.Join(err, e) } } + return err } -func (ml *ModelLoader) StopAllGRPC() { - ml.StopGRPC(includeAllProcesses) +func (ml *ModelLoader) StopAllGRPC() error { + return ml.StopGRPC(includeAllProcesses) } func (ml *ModelLoader) GetGRPCPID(id string) (int, error) { diff --git a/tests/integration/stores_test.go b/tests/integration/stores_test.go index 54d0844c..ec67af78 100644 --- a/tests/integration/stores_test.go +++ b/tests/integration/stores_test.go @@ -63,8 +63,9 @@ var _ = Describe("Integration tests for the stores backend(s) and internal APIs" }) AfterEach(func() { - sl.StopAllGRPC() - err := os.RemoveAll(tmpdir) + err := sl.StopAllGRPC() + Expect(err).ToNot(HaveOccurred()) + err = os.RemoveAll(tmpdir) Expect(err).ToNot(HaveOccurred()) }) From 93ca56086e7b00cc4e72c2cd0222eca061d22671 Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Mon, 29 Apr 2024 08:17:09 -0500 Subject: [PATCH 0170/2648] update go-tinydream to latest commit (#2182) Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 60cd3f24..0069fb8b 100644 --- a/Makefile +++ b/Makefile @@ -28,7 +28,7 @@ PIPER_VERSION?=9d0100873a7dbb0824dfea40e8cec70a1b110759 STABLEDIFFUSION_VERSION?=433ea6d9b64d9d08067324a757ef07040ea29568 # tinydream version -TINYDREAM_VERSION?=22a12a4bc0ac5455856f28f3b771331a551a4293 +TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057 export BUILD_TYPE?= export STABLE_BUILD_TYPE?=$(BUILD_TYPE) From ea13863221b7d4ac9dbad636730c2c8599984216 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 29 Apr 2024 18:17:39 +0200 Subject: [PATCH 0171/2648] models(gallery): add llama3-32k (#2183) Signed-off-by: Ettore Di Giacinto --- gallery/chatml.yaml | 41 +++++++++++++++++++++++++++++++++++++++++ gallery/index.yaml | 19 +++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 gallery/chatml.yaml diff --git a/gallery/chatml.yaml b/gallery/chatml.yaml new file mode 100644 index 00000000..e27fdab8 --- /dev/null +++ b/gallery/chatml.yaml @@ -0,0 +1,41 @@ +--- +name: "chatml" + +config_file: | + mmap: true + template: + chat_message: | + <|im_start|>{{ .RoleName }} + {{- if .FunctionCall }} + Function call: + {{- else if eq .RoleName "tool" }} + Function response: + {{- end }} + {{- if .Content}} + {{.Content }} + {{- end }} + {{- if .FunctionCall}} + {{toJson .FunctionCall}} + {{- end }} + <|im_end|> + function: | + <|im_start|>system + You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: + {{range .Functions}} + {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} + {{end}} + For each function call return a json object with function name and arguments + <|im_end|> + {{.Input -}} + <|im_start|>assistant + + chat: | + {{.Input -}} + <|im_start|>assistant + completion: | + {{.Input}} + context_size: 4096 + f16: true + stopwords: + - <|im_end|> + - diff --git a/gallery/index.yaml b/gallery/index.yaml index 012a1ecb..da0c9c59 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -412,6 +412,25 @@ - filename: dolphin-2.9-llama3-8b-q6_K.gguf sha256: 8aac72a0bd72c075ba7be1aa29945e47b07d39cd16be9a80933935f51b57fb32 uri: huggingface://cognitivecomputations/dolphin-2.9-llama3-8b-gguf/dolphin-2.9-llama3-8b-q6_K.gguf +- url: "github:mudler/LocalAI/gallery/chatml.yaml@master" + name: "llama-3-8b-instruct-dpo-v0.3-32k" + license: llama3 + urls: + - https://huggingface.co/MaziyarPanahi/Llama-3-8B-Instruct-DPO-v0.3-32k-GGUF + tags: + - llm + - gguf + - gpu + - cpu + - llama3 + overrides: + context_size: 32768 + parameters: + model: Llama-3-8B-Instruct-DPO-v0.3.Q4_K_M.gguf + files: + - filename: Llama-3-8B-Instruct-DPO-v0.3.Q4_K_M.gguf + sha256: 694c55b5215d03e59626cd4292076eaf31610ef27ba04737166766baa75d889f + uri: huggingface://MaziyarPanahi/Llama-3-8B-Instruct-DPO-v0.3-32k-GGUF/Llama-3-8B-Instruct-DPO-v0.3.Q4_K_M.gguf ## LLama2 and derivatives ### Start Fimbulvetr - &vicuna-chat From baff5ff8c262744aab2793809a2d764d266ac8bd Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 29 Apr 2024 18:17:47 +0200 Subject: [PATCH 0172/2648] models(gallery): add openvino models (#2184) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 46 ++++++++++++++++++++++++++++++++++++++++++- gallery/openvino.yaml | 12 +++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 gallery/openvino.yaml diff --git a/gallery/index.yaml b/gallery/index.yaml index da0c9c59..e510e97e 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -778,7 +778,51 @@ - filename: "codellama-7b.Q4_0.gguf" sha256: "33052f6dd41436db2f83bd48017b6fff8ce0184e15a8a227368b4230f1da97b5" uri: "huggingface://TheBloke/CodeLlama-7B-GGUF/codellama-7b.Q4_0.gguf" - +### START OpenVINO +- &openvino + url: "github:mudler/LocalAI/gallery/openvino.yaml@master" + name: "openvino-llama-3-8b-instruct-ov-int8" + license: llama3 + urls: + - https://huggingface.co/fakezeta/llama-3-8b-instruct-ov-int8 + overrides: + parameters: + model: fakezeta/llama-3-8b-instruct-ov-int8 + stopwords: + - "<|eot_id|>" + - "<|end_of_text|>" + tags: + - llm + - openvino + - gpu + - llama3 + - cpu +- <<: *openvino + name: "openvino-phi3" + urls: + - https://huggingface.co/fakezeta/Phi-3-mini-128k-instruct-ov-int8 + overrides: + context_size: 131072 + parameters: + model: fakezeta/Phi-3-mini-128k-instruct-ov-int8 + stopwords: + - <|end|> +- <<: *openvino + name: "openvino-starling-lm-7b-beta-openvino-int8" + urls: + - https://huggingface.co/fakezeta/Starling-LM-7B-beta-openvino-int8 + overrides: + context_size: 8192 + parameters: + model: fakezeta/Starling-LM-7B-beta-openvino-int8 +- <<: *openvino + name: "openvino-wizardlm2" + urls: + - https://huggingface.co/fakezeta/Not-WizardLM-2-7B-ov-int8 + overrides: + context_size: 8192 + parameters: + model: fakezeta/Not-WizardLM-2-7B-ov-int8 ### START Embeddings - &sentencentransformers description: | diff --git a/gallery/openvino.yaml b/gallery/openvino.yaml new file mode 100644 index 00000000..b30b2a85 --- /dev/null +++ b/gallery/openvino.yaml @@ -0,0 +1,12 @@ +--- +name: openvino + +config_file: | + backend: transformers + context_size: 8192 + type: OVModelForCausalLM + template: + use_tokenizer_template: true + stopwords: + - "<|eot_id|>" + - "<|end_of_text|>" From 147440b39b9ed5d0542ea79920662aaddb3d0935 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 29 Apr 2024 18:31:50 +0200 Subject: [PATCH 0173/2648] docs: add reference for concurrent requests Signed-off-by: Ettore Di Giacinto --- docs/content/docs/advanced/advanced-usage.md | 26 +++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/docs/content/docs/advanced/advanced-usage.md b/docs/content/docs/advanced/advanced-usage.md index cbf7dba3..085606e5 100644 --- a/docs/content/docs/advanced/advanced-usage.md +++ b/docs/content/docs/advanced/advanced-usage.md @@ -498,4 +498,28 @@ When using the `-core` container image it is possible to prepare the python back ```bash docker run --env EXTRA_BACKENDS="backend/python/diffusers" quay.io/go-skynet/local-ai:master-ffmpeg-core -``` \ No newline at end of file +``` + +### Concurrent requests + +LocalAI supports parallel requests for the backends that supports it. For instance, vLLM and llama.cpp supports parallel requests, and thus LocalAI allows to run multiple requests in parallel. + +In order to enable parallel requests, you have to pass `--parallel-requests` or set the `PARALLEL_REQUEST` to true as environment variable. + +A list of the environment variable that tweaks parallelism is the following: + +``` +### Python backends GRPC max workers +### Default number of workers for GRPC Python backends. +### This actually controls wether a backend can process multiple requests or not. +# PYTHON_GRPC_MAX_WORKERS=1 + +### Define the number of parallel LLAMA.cpp workers (Defaults to 1) +# LLAMACPP_PARALLEL=1 + +### Enable to run parallel requests +# LOCALAI_PARALLEL_REQUESTS=true +``` + +Note that, for llama.cpp you need to set accordingly `LLAMACPP_PARALLEL` to the number of parallel processes your GPU/CPU can handle. For python-based backends (like vLLM) you can set `PYTHON_GRPC_MAX_WORKERS` to the number of parallel requests. + From c4f958e11b59534c67ce9a69bff1733ab6817d3e Mon Sep 17 00:00:00 2001 From: Dave Date: Mon, 29 Apr 2024 13:42:37 -0400 Subject: [PATCH 0174/2648] refactor(application): introduce application global state (#2072) * start breaking up the giant channel refactor now that it's better understood - easier to merge bites Signed-off-by: Dave Lee * add concurrency and base64 back in, along with new base64 tests. Signed-off-by: Dave Lee * Automatic rename of whisper.go's Result to TranscriptResult Signed-off-by: Dave Lee * remove pkg/concurrency - significant changes coming in split 2 Signed-off-by: Dave Lee * fix comments Signed-off-by: Dave Lee * add list_model service as another low-risk service to get it out of the way Signed-off-by: Dave Lee * split backend config loader into seperate file from the actual config struct. No changes yet, just reduce cognative load with smaller files of logical blocks Signed-off-by: Dave Lee * rename state.go ==> application.go Signed-off-by: Dave Lee * fix lost import? Signed-off-by: Dave Lee --------- Signed-off-by: Dave Lee --- backend/go/transcribe/transcript.go | 4 +- backend/go/transcribe/whisper.go | 2 +- core/application.go | 39 +++ core/backend/transcript.go | 2 +- core/config/backend_config.go | 313 +---------------- core/config/backend_config_loader.go | 317 ++++++++++++++++++ core/http/app.go | 23 -- .../http/endpoints/localai/backend_monitor.go | 4 +- core/http/endpoints/openai/list.go | 52 +-- core/http/routes/localai.go | 6 +- core/http/routes/openai.go | 6 +- core/schema/{whisper.go => transcription.go} | 2 +- core/services/backend_monitor.go | 42 +-- core/services/list_models.go | 72 ++++ core/startup/startup.go | 31 ++ pkg/grpc/backend.go | 2 +- pkg/grpc/base/base.go | 4 +- pkg/grpc/client.go | 4 +- pkg/grpc/embed.go | 4 +- pkg/grpc/interface.go | 2 +- pkg/utils/base64.go | 50 +++ pkg/utils/base64_test.go | 31 ++ 22 files changed, 590 insertions(+), 422 deletions(-) create mode 100644 core/application.go create mode 100644 core/config/backend_config_loader.go rename core/schema/{whisper.go => transcription.go} (90%) create mode 100644 core/services/list_models.go create mode 100644 pkg/utils/base64.go create mode 100644 pkg/utils/base64_test.go diff --git a/backend/go/transcribe/transcript.go b/backend/go/transcribe/transcript.go index 74833e4d..256be71f 100644 --- a/backend/go/transcribe/transcript.go +++ b/backend/go/transcribe/transcript.go @@ -29,8 +29,8 @@ func audioToWav(src, dst string) error { return nil } -func Transcript(model whisper.Model, audiopath, language string, threads uint) (schema.Result, error) { - res := schema.Result{} +func Transcript(model whisper.Model, audiopath, language string, threads uint) (schema.TranscriptionResult, error) { + res := schema.TranscriptionResult{} dir, err := os.MkdirTemp("", "whisper") if err != nil { diff --git a/backend/go/transcribe/whisper.go b/backend/go/transcribe/whisper.go index ac93be01..a9a62d24 100644 --- a/backend/go/transcribe/whisper.go +++ b/backend/go/transcribe/whisper.go @@ -21,6 +21,6 @@ func (sd *Whisper) Load(opts *pb.ModelOptions) error { return err } -func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.Result, error) { +func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.TranscriptionResult, error) { return Transcript(sd.whisper, opts.Dst, opts.Language, uint(opts.Threads)) } diff --git a/core/application.go b/core/application.go new file mode 100644 index 00000000..54d3dedf --- /dev/null +++ b/core/application.go @@ -0,0 +1,39 @@ +package core + +import ( + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/services" + "github.com/go-skynet/LocalAI/pkg/model" +) + +// The purpose of this structure is to hold pointers to all initialized services, to make plumbing easy +// Perhaps a proper DI system is worth it in the future, but for now keep things simple. +type Application struct { + + // Application-Level Config + ApplicationConfig *config.ApplicationConfig + // ApplicationState *ApplicationState + + // Core Low-Level Services + BackendConfigLoader *config.BackendConfigLoader + ModelLoader *model.ModelLoader + + // Backend Services + // EmbeddingsBackendService *backend.EmbeddingsBackendService + // ImageGenerationBackendService *backend.ImageGenerationBackendService + // LLMBackendService *backend.LLMBackendService + // TranscriptionBackendService *backend.TranscriptionBackendService + // TextToSpeechBackendService *backend.TextToSpeechBackendService + + // LocalAI System Services + BackendMonitorService *services.BackendMonitorService + GalleryService *services.GalleryService + ListModelsService *services.ListModelsService + LocalAIMetricsService *services.LocalAIMetricsService + // OpenAIService *services.OpenAIService +} + +// TODO [NEXT PR?]: Break up ApplicationConfig. +// Migrate over stuff that is not set via config at all - especially runtime stuff +type ApplicationState struct { +} diff --git a/core/backend/transcript.go b/core/backend/transcript.go index 4c3859df..e620bebd 100644 --- a/core/backend/transcript.go +++ b/core/backend/transcript.go @@ -11,7 +11,7 @@ import ( model "github.com/go-skynet/LocalAI/pkg/model" ) -func ModelTranscription(audio, language string, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.Result, error) { +func ModelTranscription(audio, language string, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) { opts := modelOpts(backendConfig, appConfig, []model.Option{ model.WithBackendString(model.WhisperBackend), diff --git a/core/config/backend_config.go b/core/config/backend_config.go index 0d7d0cbf..cb1b7c2a 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -1,23 +1,12 @@ package config import ( - "errors" - "fmt" - "io/fs" "os" - "path/filepath" - "sort" - "strings" - "sync" "github.com/go-skynet/LocalAI/core/schema" "github.com/go-skynet/LocalAI/pkg/downloader" "github.com/go-skynet/LocalAI/pkg/functions" "github.com/go-skynet/LocalAI/pkg/utils" - "github.com/rs/zerolog/log" - "gopkg.in/yaml.v3" - - "github.com/charmbracelet/glamour" ) const ( @@ -140,7 +129,7 @@ type LLMConfig struct { EnforceEager bool `yaml:"enforce_eager"` // vLLM SwapSpace int `yaml:"swap_space"` // vLLM MaxModelLen int `yaml:"max_model_len"` // vLLM - TensorParallelSize int `yaml:"tensor_parallel_size"` // vLLM + TensorParallelSize int `yaml:"tensor_parallel_size"` // vLLM MMProj string `yaml:"mmproj"` RopeScaling string `yaml:"rope_scaling"` @@ -343,303 +332,3 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) { cfg.Debug = &trueV } } - -////// Config Loader //////// - -type BackendConfigLoader struct { - configs map[string]BackendConfig - sync.Mutex -} - -type LoadOptions struct { - debug bool - threads, ctxSize int - f16 bool -} - -func LoadOptionDebug(debug bool) ConfigLoaderOption { - return func(o *LoadOptions) { - o.debug = debug - } -} - -func LoadOptionThreads(threads int) ConfigLoaderOption { - return func(o *LoadOptions) { - o.threads = threads - } -} - -func LoadOptionContextSize(ctxSize int) ConfigLoaderOption { - return func(o *LoadOptions) { - o.ctxSize = ctxSize - } -} - -func LoadOptionF16(f16 bool) ConfigLoaderOption { - return func(o *LoadOptions) { - o.f16 = f16 - } -} - -type ConfigLoaderOption func(*LoadOptions) - -func (lo *LoadOptions) Apply(options ...ConfigLoaderOption) { - for _, l := range options { - l(lo) - } -} - -// Load a config file for a model -func (cl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath string, opts ...ConfigLoaderOption) (*BackendConfig, error) { - - // Load a config file if present after the model name - cfg := &BackendConfig{ - PredictionOptions: schema.PredictionOptions{ - Model: modelName, - }, - } - - cfgExisting, exists := cl.GetBackendConfig(modelName) - if exists { - cfg = &cfgExisting - } else { - // Try loading a model config file - modelConfig := filepath.Join(modelPath, modelName+".yaml") - if _, err := os.Stat(modelConfig); err == nil { - if err := cl.LoadBackendConfig( - modelConfig, opts..., - ); err != nil { - return nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error()) - } - cfgExisting, exists = cl.GetBackendConfig(modelName) - if exists { - cfg = &cfgExisting - } - } - } - - cfg.SetDefaults(opts...) - - return cfg, nil -} - -func NewBackendConfigLoader() *BackendConfigLoader { - return &BackendConfigLoader{ - configs: make(map[string]BackendConfig), - } -} -func ReadBackendConfigFile(file string, opts ...ConfigLoaderOption) ([]*BackendConfig, error) { - c := &[]*BackendConfig{} - f, err := os.ReadFile(file) - if err != nil { - return nil, fmt.Errorf("cannot read config file: %w", err) - } - if err := yaml.Unmarshal(f, c); err != nil { - return nil, fmt.Errorf("cannot unmarshal config file: %w", err) - } - - for _, cc := range *c { - cc.SetDefaults(opts...) - } - - return *c, nil -} - -func ReadBackendConfig(file string, opts ...ConfigLoaderOption) (*BackendConfig, error) { - lo := &LoadOptions{} - lo.Apply(opts...) - - c := &BackendConfig{} - f, err := os.ReadFile(file) - if err != nil { - return nil, fmt.Errorf("cannot read config file: %w", err) - } - if err := yaml.Unmarshal(f, c); err != nil { - return nil, fmt.Errorf("cannot unmarshal config file: %w", err) - } - - c.SetDefaults(opts...) - return c, nil -} - -func (cm *BackendConfigLoader) LoadBackendConfigFile(file string, opts ...ConfigLoaderOption) error { - cm.Lock() - defer cm.Unlock() - c, err := ReadBackendConfigFile(file, opts...) - if err != nil { - return fmt.Errorf("cannot load config file: %w", err) - } - - for _, cc := range c { - cm.configs[cc.Name] = *cc - } - return nil -} - -func (cl *BackendConfigLoader) LoadBackendConfig(file string, opts ...ConfigLoaderOption) error { - cl.Lock() - defer cl.Unlock() - c, err := ReadBackendConfig(file, opts...) - if err != nil { - return fmt.Errorf("cannot read config file: %w", err) - } - - cl.configs[c.Name] = *c - return nil -} - -func (cl *BackendConfigLoader) GetBackendConfig(m string) (BackendConfig, bool) { - cl.Lock() - defer cl.Unlock() - v, exists := cl.configs[m] - return v, exists -} - -func (cl *BackendConfigLoader) GetAllBackendConfigs() []BackendConfig { - cl.Lock() - defer cl.Unlock() - var res []BackendConfig - for _, v := range cl.configs { - res = append(res, v) - } - - sort.SliceStable(res, func(i, j int) bool { - return res[i].Name < res[j].Name - }) - - return res -} - -func (cl *BackendConfigLoader) ListBackendConfigs() []string { - cl.Lock() - defer cl.Unlock() - var res []string - for k := range cl.configs { - res = append(res, k) - } - return res -} - -// Preload prepare models if they are not local but url or huggingface repositories -func (cl *BackendConfigLoader) Preload(modelPath string) error { - cl.Lock() - defer cl.Unlock() - - status := func(fileName, current, total string, percent float64) { - utils.DisplayDownloadFunction(fileName, current, total, percent) - } - - log.Info().Msgf("Preloading models from %s", modelPath) - - renderMode := "dark" - if os.Getenv("COLOR") != "" { - renderMode = os.Getenv("COLOR") - } - - glamText := func(t string) { - out, err := glamour.Render(t, renderMode) - if err == nil && os.Getenv("NO_COLOR") == "" { - fmt.Println(out) - } else { - fmt.Println(t) - } - } - - for i, config := range cl.configs { - - // Download files and verify their SHA - for i, file := range config.DownloadFiles { - log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename) - - if err := utils.VerifyPath(file.Filename, modelPath); err != nil { - return err - } - // Create file path - filePath := filepath.Join(modelPath, file.Filename) - - if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, i, len(config.DownloadFiles), status); err != nil { - return err - } - } - - // If the model is an URL, expand it, and download the file - if config.IsModelURL() { - modelFileName := config.ModelFileName() - modelURL := downloader.ConvertURL(config.Model) - // check if file exists - if _, err := os.Stat(filepath.Join(modelPath, modelFileName)); errors.Is(err, os.ErrNotExist) { - err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, modelFileName), "", 0, 0, status) - if err != nil { - return err - } - } - - cc := cl.configs[i] - c := &cc - c.PredictionOptions.Model = modelFileName - cl.configs[i] = *c - } - - if config.IsMMProjURL() { - modelFileName := config.MMProjFileName() - modelURL := downloader.ConvertURL(config.MMProj) - // check if file exists - if _, err := os.Stat(filepath.Join(modelPath, modelFileName)); errors.Is(err, os.ErrNotExist) { - err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, modelFileName), "", 0, 0, status) - if err != nil { - return err - } - } - - cc := cl.configs[i] - c := &cc - c.MMProj = modelFileName - cl.configs[i] = *c - } - - if cl.configs[i].Name != "" { - glamText(fmt.Sprintf("**Model name**: _%s_", cl.configs[i].Name)) - } - if cl.configs[i].Description != "" { - //glamText("**Description**") - glamText(cl.configs[i].Description) - } - if cl.configs[i].Usage != "" { - //glamText("**Usage**") - glamText(cl.configs[i].Usage) - } - } - return nil -} - -// LoadBackendConfigsFromPath reads all the configurations of the models from a path -// (non-recursive) -func (cm *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...ConfigLoaderOption) error { - cm.Lock() - defer cm.Unlock() - entries, err := os.ReadDir(path) - if err != nil { - return err - } - files := make([]fs.FileInfo, 0, len(entries)) - for _, entry := range entries { - info, err := entry.Info() - if err != nil { - return err - } - files = append(files, info) - } - for _, file := range files { - // Skip templates, YAML and .keep files - if !strings.Contains(file.Name(), ".yaml") && !strings.Contains(file.Name(), ".yml") || - strings.HasPrefix(file.Name(), ".") { - continue - } - c, err := ReadBackendConfig(filepath.Join(path, file.Name()), opts...) - if err == nil { - cm.configs[c.Name] = *c - } - } - - return nil -} diff --git a/core/config/backend_config_loader.go b/core/config/backend_config_loader.go new file mode 100644 index 00000000..83b66740 --- /dev/null +++ b/core/config/backend_config_loader.go @@ -0,0 +1,317 @@ +package config + +import ( + "errors" + "fmt" + "io/fs" + "os" + "path/filepath" + "sort" + "strings" + "sync" + + "github.com/charmbracelet/glamour" + "github.com/go-skynet/LocalAI/core/schema" + "github.com/go-skynet/LocalAI/pkg/downloader" + "github.com/go-skynet/LocalAI/pkg/utils" + "github.com/rs/zerolog/log" + "gopkg.in/yaml.v3" +) + +type BackendConfigLoader struct { + configs map[string]BackendConfig + sync.Mutex +} + +type LoadOptions struct { + debug bool + threads, ctxSize int + f16 bool +} + +func LoadOptionDebug(debug bool) ConfigLoaderOption { + return func(o *LoadOptions) { + o.debug = debug + } +} + +func LoadOptionThreads(threads int) ConfigLoaderOption { + return func(o *LoadOptions) { + o.threads = threads + } +} + +func LoadOptionContextSize(ctxSize int) ConfigLoaderOption { + return func(o *LoadOptions) { + o.ctxSize = ctxSize + } +} + +func LoadOptionF16(f16 bool) ConfigLoaderOption { + return func(o *LoadOptions) { + o.f16 = f16 + } +} + +type ConfigLoaderOption func(*LoadOptions) + +func (lo *LoadOptions) Apply(options ...ConfigLoaderOption) { + for _, l := range options { + l(lo) + } +} + +// Load a config file for a model +func (cl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath string, opts ...ConfigLoaderOption) (*BackendConfig, error) { + + // Load a config file if present after the model name + cfg := &BackendConfig{ + PredictionOptions: schema.PredictionOptions{ + Model: modelName, + }, + } + + cfgExisting, exists := cl.GetBackendConfig(modelName) + if exists { + cfg = &cfgExisting + } else { + // Try loading a model config file + modelConfig := filepath.Join(modelPath, modelName+".yaml") + if _, err := os.Stat(modelConfig); err == nil { + if err := cl.LoadBackendConfig( + modelConfig, opts..., + ); err != nil { + return nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error()) + } + cfgExisting, exists = cl.GetBackendConfig(modelName) + if exists { + cfg = &cfgExisting + } + } + } + + cfg.SetDefaults(opts...) + + return cfg, nil +} + +func NewBackendConfigLoader() *BackendConfigLoader { + return &BackendConfigLoader{ + configs: make(map[string]BackendConfig), + } +} +func ReadBackendConfigFile(file string, opts ...ConfigLoaderOption) ([]*BackendConfig, error) { + c := &[]*BackendConfig{} + f, err := os.ReadFile(file) + if err != nil { + return nil, fmt.Errorf("cannot read config file: %w", err) + } + if err := yaml.Unmarshal(f, c); err != nil { + return nil, fmt.Errorf("cannot unmarshal config file: %w", err) + } + + for _, cc := range *c { + cc.SetDefaults(opts...) + } + + return *c, nil +} + +func ReadBackendConfig(file string, opts ...ConfigLoaderOption) (*BackendConfig, error) { + lo := &LoadOptions{} + lo.Apply(opts...) + + c := &BackendConfig{} + f, err := os.ReadFile(file) + if err != nil { + return nil, fmt.Errorf("cannot read config file: %w", err) + } + if err := yaml.Unmarshal(f, c); err != nil { + return nil, fmt.Errorf("cannot unmarshal config file: %w", err) + } + + c.SetDefaults(opts...) + return c, nil +} + +func (cm *BackendConfigLoader) LoadBackendConfigFile(file string, opts ...ConfigLoaderOption) error { + cm.Lock() + defer cm.Unlock() + c, err := ReadBackendConfigFile(file, opts...) + if err != nil { + return fmt.Errorf("cannot load config file: %w", err) + } + + for _, cc := range c { + cm.configs[cc.Name] = *cc + } + return nil +} + +func (cl *BackendConfigLoader) LoadBackendConfig(file string, opts ...ConfigLoaderOption) error { + cl.Lock() + defer cl.Unlock() + c, err := ReadBackendConfig(file, opts...) + if err != nil { + return fmt.Errorf("cannot read config file: %w", err) + } + + cl.configs[c.Name] = *c + return nil +} + +func (cl *BackendConfigLoader) GetBackendConfig(m string) (BackendConfig, bool) { + cl.Lock() + defer cl.Unlock() + v, exists := cl.configs[m] + return v, exists +} + +func (cl *BackendConfigLoader) GetAllBackendConfigs() []BackendConfig { + cl.Lock() + defer cl.Unlock() + var res []BackendConfig + for _, v := range cl.configs { + res = append(res, v) + } + + sort.SliceStable(res, func(i, j int) bool { + return res[i].Name < res[j].Name + }) + + return res +} + +func (cl *BackendConfigLoader) ListBackendConfigs() []string { + cl.Lock() + defer cl.Unlock() + var res []string + for k := range cl.configs { + res = append(res, k) + } + return res +} + +// Preload prepare models if they are not local but url or huggingface repositories +func (cl *BackendConfigLoader) Preload(modelPath string) error { + cl.Lock() + defer cl.Unlock() + + status := func(fileName, current, total string, percent float64) { + utils.DisplayDownloadFunction(fileName, current, total, percent) + } + + log.Info().Msgf("Preloading models from %s", modelPath) + + renderMode := "dark" + if os.Getenv("COLOR") != "" { + renderMode = os.Getenv("COLOR") + } + + glamText := func(t string) { + out, err := glamour.Render(t, renderMode) + if err == nil && os.Getenv("NO_COLOR") == "" { + fmt.Println(out) + } else { + fmt.Println(t) + } + } + + for i, config := range cl.configs { + + // Download files and verify their SHA + for i, file := range config.DownloadFiles { + log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename) + + if err := utils.VerifyPath(file.Filename, modelPath); err != nil { + return err + } + // Create file path + filePath := filepath.Join(modelPath, file.Filename) + + if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, i, len(config.DownloadFiles), status); err != nil { + return err + } + } + + // If the model is an URL, expand it, and download the file + if config.IsModelURL() { + modelFileName := config.ModelFileName() + modelURL := downloader.ConvertURL(config.Model) + // check if file exists + if _, err := os.Stat(filepath.Join(modelPath, modelFileName)); errors.Is(err, os.ErrNotExist) { + err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, modelFileName), "", 0, 0, status) + if err != nil { + return err + } + } + + cc := cl.configs[i] + c := &cc + c.PredictionOptions.Model = modelFileName + cl.configs[i] = *c + } + + if config.IsMMProjURL() { + modelFileName := config.MMProjFileName() + modelURL := downloader.ConvertURL(config.MMProj) + // check if file exists + if _, err := os.Stat(filepath.Join(modelPath, modelFileName)); errors.Is(err, os.ErrNotExist) { + err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, modelFileName), "", 0, 0, status) + if err != nil { + return err + } + } + + cc := cl.configs[i] + c := &cc + c.MMProj = modelFileName + cl.configs[i] = *c + } + + if cl.configs[i].Name != "" { + glamText(fmt.Sprintf("**Model name**: _%s_", cl.configs[i].Name)) + } + if cl.configs[i].Description != "" { + //glamText("**Description**") + glamText(cl.configs[i].Description) + } + if cl.configs[i].Usage != "" { + //glamText("**Usage**") + glamText(cl.configs[i].Usage) + } + } + return nil +} + +// LoadBackendConfigsFromPath reads all the configurations of the models from a path +// (non-recursive) +func (cm *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...ConfigLoaderOption) error { + cm.Lock() + defer cm.Unlock() + entries, err := os.ReadDir(path) + if err != nil { + return err + } + files := make([]fs.FileInfo, 0, len(entries)) + for _, entry := range entries { + info, err := entry.Info() + if err != nil { + return err + } + files = append(files, info) + } + for _, file := range files { + // Skip templates, YAML and .keep files + if !strings.Contains(file.Name(), ".yaml") && !strings.Contains(file.Name(), ".yml") || + strings.HasPrefix(file.Name(), ".") { + continue + } + c, err := ReadBackendConfig(filepath.Join(path, file.Name()), opts...) + if err == nil { + cm.configs[c.Name] = *c + } + } + + return nil +} diff --git a/core/http/app.go b/core/http/app.go index bd740410..080535a4 100644 --- a/core/http/app.go +++ b/core/http/app.go @@ -1,9 +1,7 @@ package http import ( - "encoding/json" "errors" - "os" "strings" "github.com/go-skynet/LocalAI/pkg/utils" @@ -124,20 +122,6 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi return c.Next() } - // Check for api_keys.json file - fileContent, err := os.ReadFile("api_keys.json") - if err == nil { - // Parse JSON content from the file - var fileKeys []string - err := json.Unmarshal(fileContent, &fileKeys) - if err != nil { - return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{"message": "Error parsing api_keys.json"}) - } - - // Add file keys to options.ApiKeys - appConfig.ApiKeys = append(appConfig.ApiKeys, fileKeys...) - } - if len(appConfig.ApiKeys) == 0 { return c.Next() } @@ -174,13 +158,6 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi app.Use(c) } - // Make sure directories exists - os.MkdirAll(appConfig.ImageDir, 0750) - os.MkdirAll(appConfig.AudioDir, 0750) - os.MkdirAll(appConfig.UploadDir, 0750) - os.MkdirAll(appConfig.ConfigsDir, 0750) - os.MkdirAll(appConfig.ModelPath, 0750) - // Load config jsons utils.LoadConfig(appConfig.UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles) utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants) diff --git a/core/http/endpoints/localai/backend_monitor.go b/core/http/endpoints/localai/backend_monitor.go index 8c7a664a..dac20388 100644 --- a/core/http/endpoints/localai/backend_monitor.go +++ b/core/http/endpoints/localai/backend_monitor.go @@ -6,7 +6,7 @@ import ( "github.com/gofiber/fiber/v2" ) -func BackendMonitorEndpoint(bm services.BackendMonitor) func(c *fiber.Ctx) error { +func BackendMonitorEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { input := new(schema.BackendMonitorRequest) @@ -23,7 +23,7 @@ func BackendMonitorEndpoint(bm services.BackendMonitor) func(c *fiber.Ctx) error } } -func BackendShutdownEndpoint(bm services.BackendMonitor) func(c *fiber.Ctx) error { +func BackendShutdownEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { input := new(schema.BackendMonitorRequest) // Get input data from the request body diff --git a/core/http/endpoints/openai/list.go b/core/http/endpoints/openai/list.go index 04e611a2..2caea96b 100644 --- a/core/http/endpoints/openai/list.go +++ b/core/http/endpoints/openai/list.go @@ -1,63 +1,23 @@ package openai import ( - "regexp" - - "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/schema" - model "github.com/go-skynet/LocalAI/pkg/model" + "github.com/go-skynet/LocalAI/core/services" "github.com/gofiber/fiber/v2" ) -func ListModelsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader) func(ctx *fiber.Ctx) error { +func ListModelsEndpoint(lms *services.ListModelsService) func(ctx *fiber.Ctx) error { return func(c *fiber.Ctx) error { - models, err := ml.ListModels() - if err != nil { - return err - } - var mm map[string]interface{} = map[string]interface{}{} - - dataModels := []schema.OpenAIModel{} - - var filterFn func(name string) bool + // If blank, no filter is applied. filter := c.Query("filter") - // If filter is not specified, do not filter the list by model name - if filter == "" { - filterFn = func(_ string) bool { return true } - } else { - // If filter _IS_ specified, we compile it to a regex which is used to create the filterFn - rxp, err := regexp.Compile(filter) - if err != nil { - return err - } - filterFn = func(name string) bool { - return rxp.MatchString(name) - } - } - // By default, exclude any loose files that are already referenced by a configuration file. excludeConfigured := c.QueryBool("excludeConfigured", true) - // Start with the known configurations - for _, c := range cl.GetAllBackendConfigs() { - if excludeConfigured { - mm[c.Model] = nil - } - - if filterFn(c.Name) { - dataModels = append(dataModels, schema.OpenAIModel{ID: c.Name, Object: "model"}) - } + dataModels, err := lms.ListModels(filter, excludeConfigured) + if err != nil { + return err } - - // Then iterate through the loose files: - for _, m := range models { - // And only adds them if they shouldn't be skipped. - if _, exists := mm[m]; !exists && filterFn(m) { - dataModels = append(dataModels, schema.OpenAIModel{ID: m, Object: "model"}) - } - } - return c.JSON(struct { Object string `json:"object"` Data []schema.OpenAIModel `json:"data"` diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go index 138babbe..a5099d60 100644 --- a/core/http/routes/localai.go +++ b/core/http/routes/localai.go @@ -52,9 +52,9 @@ func RegisterLocalAIRoutes(app *fiber.App, app.Get("/metrics", auth, localai.LocalAIMetricsEndpoint()) // Experimental Backend Statistics Module - backendMonitor := services.NewBackendMonitor(cl, ml, appConfig) // Split out for now - app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(backendMonitor)) - app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(backendMonitor)) + backendMonitorService := services.NewBackendMonitorService(ml, cl, appConfig) // Split out for now + app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(backendMonitorService)) + app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(backendMonitorService)) app.Get("/version", auth, func(c *fiber.Ctx) error { return c.JSON(struct { diff --git a/core/http/routes/openai.go b/core/http/routes/openai.go index c51ccdcb..74f20175 100644 --- a/core/http/routes/openai.go +++ b/core/http/routes/openai.go @@ -4,6 +4,7 @@ import ( "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/http/endpoints/localai" "github.com/go-skynet/LocalAI/core/http/endpoints/openai" + "github.com/go-skynet/LocalAI/core/services" "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" ) @@ -81,6 +82,7 @@ func RegisterOpenAIRoutes(app *fiber.App, } // models - app.Get("/v1/models", auth, openai.ListModelsEndpoint(cl, ml)) - app.Get("/models", auth, openai.ListModelsEndpoint(cl, ml)) + tmpLMS := services.NewListModelsService(ml, cl, appConfig) // TODO: once createApplication() is fully in use, reference the central instance. + app.Get("/v1/models", auth, openai.ListModelsEndpoint(tmpLMS)) + app.Get("/models", auth, openai.ListModelsEndpoint(tmpLMS)) } diff --git a/core/schema/whisper.go b/core/schema/transcription.go similarity index 90% rename from core/schema/whisper.go rename to core/schema/transcription.go index 41413c1f..fe1799fa 100644 --- a/core/schema/whisper.go +++ b/core/schema/transcription.go @@ -10,7 +10,7 @@ type Segment struct { Tokens []int `json:"tokens"` } -type Result struct { +type TranscriptionResult struct { Segments []Segment `json:"segments"` Text string `json:"text"` } diff --git a/core/services/backend_monitor.go b/core/services/backend_monitor.go index 979a67a3..4e993ed9 100644 --- a/core/services/backend_monitor.go +++ b/core/services/backend_monitor.go @@ -15,22 +15,22 @@ import ( gopsutil "github.com/shirou/gopsutil/v3/process" ) -type BackendMonitor struct { - configLoader *config.BackendConfigLoader - modelLoader *model.ModelLoader - options *config.ApplicationConfig // Taking options in case we need to inspect ExternalGRPCBackends, though that's out of scope for now, hence the name. +type BackendMonitorService struct { + backendConfigLoader *config.BackendConfigLoader + modelLoader *model.ModelLoader + options *config.ApplicationConfig // Taking options in case we need to inspect ExternalGRPCBackends, though that's out of scope for now, hence the name. } -func NewBackendMonitor(configLoader *config.BackendConfigLoader, modelLoader *model.ModelLoader, appConfig *config.ApplicationConfig) BackendMonitor { - return BackendMonitor{ - configLoader: configLoader, - modelLoader: modelLoader, - options: appConfig, +func NewBackendMonitorService(modelLoader *model.ModelLoader, configLoader *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *BackendMonitorService { + return &BackendMonitorService{ + modelLoader: modelLoader, + backendConfigLoader: configLoader, + options: appConfig, } } -func (bm BackendMonitor) getModelLoaderIDFromModelName(modelName string) (string, error) { - config, exists := bm.configLoader.GetBackendConfig(modelName) +func (bms BackendMonitorService) getModelLoaderIDFromModelName(modelName string) (string, error) { + config, exists := bms.backendConfigLoader.GetBackendConfig(modelName) var backendId string if exists { backendId = config.Model @@ -46,8 +46,8 @@ func (bm BackendMonitor) getModelLoaderIDFromModelName(modelName string) (string return backendId, nil } -func (bm *BackendMonitor) SampleLocalBackendProcess(model string) (*schema.BackendMonitorResponse, error) { - config, exists := bm.configLoader.GetBackendConfig(model) +func (bms *BackendMonitorService) SampleLocalBackendProcess(model string) (*schema.BackendMonitorResponse, error) { + config, exists := bms.backendConfigLoader.GetBackendConfig(model) var backend string if exists { backend = config.Model @@ -60,7 +60,7 @@ func (bm *BackendMonitor) SampleLocalBackendProcess(model string) (*schema.Backe backend = fmt.Sprintf("%s.bin", backend) } - pid, err := bm.modelLoader.GetGRPCPID(backend) + pid, err := bms.modelLoader.GetGRPCPID(backend) if err != nil { log.Error().Err(err).Str("model", model).Msg("failed to find GRPC pid") @@ -101,12 +101,12 @@ func (bm *BackendMonitor) SampleLocalBackendProcess(model string) (*schema.Backe }, nil } -func (bm BackendMonitor) CheckAndSample(modelName string) (*proto.StatusResponse, error) { - backendId, err := bm.getModelLoaderIDFromModelName(modelName) +func (bms BackendMonitorService) CheckAndSample(modelName string) (*proto.StatusResponse, error) { + backendId, err := bms.getModelLoaderIDFromModelName(modelName) if err != nil { return nil, err } - modelAddr := bm.modelLoader.CheckIsLoaded(backendId) + modelAddr := bms.modelLoader.CheckIsLoaded(backendId) if modelAddr == "" { return nil, fmt.Errorf("backend %s is not currently loaded", backendId) } @@ -114,7 +114,7 @@ func (bm BackendMonitor) CheckAndSample(modelName string) (*proto.StatusResponse status, rpcErr := modelAddr.GRPC(false, nil).Status(context.TODO()) if rpcErr != nil { log.Warn().Msgf("backend %s experienced an error retrieving status info: %s", backendId, rpcErr.Error()) - val, slbErr := bm.SampleLocalBackendProcess(backendId) + val, slbErr := bms.SampleLocalBackendProcess(backendId) if slbErr != nil { return nil, fmt.Errorf("backend %s experienced an error retrieving status info via rpc: %s, then failed local node process sample: %s", backendId, rpcErr.Error(), slbErr.Error()) } @@ -131,10 +131,10 @@ func (bm BackendMonitor) CheckAndSample(modelName string) (*proto.StatusResponse return status, nil } -func (bm BackendMonitor) ShutdownModel(modelName string) error { - backendId, err := bm.getModelLoaderIDFromModelName(modelName) +func (bms BackendMonitorService) ShutdownModel(modelName string) error { + backendId, err := bms.getModelLoaderIDFromModelName(modelName) if err != nil { return err } - return bm.modelLoader.ShutdownModel(backendId) + return bms.modelLoader.ShutdownModel(backendId) } diff --git a/core/services/list_models.go b/core/services/list_models.go new file mode 100644 index 00000000..a21e6faf --- /dev/null +++ b/core/services/list_models.go @@ -0,0 +1,72 @@ +package services + +import ( + "regexp" + + "github.com/go-skynet/LocalAI/core/config" + "github.com/go-skynet/LocalAI/core/schema" + "github.com/go-skynet/LocalAI/pkg/model" +) + +type ListModelsService struct { + bcl *config.BackendConfigLoader + ml *model.ModelLoader + appConfig *config.ApplicationConfig +} + +func NewListModelsService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *ListModelsService { + return &ListModelsService{ + bcl: bcl, + ml: ml, + appConfig: appConfig, + } +} + +func (lms *ListModelsService) ListModels(filter string, excludeConfigured bool) ([]schema.OpenAIModel, error) { + + models, err := lms.ml.ListModels() + if err != nil { + return nil, err + } + + var mm map[string]interface{} = map[string]interface{}{} + + dataModels := []schema.OpenAIModel{} + + var filterFn func(name string) bool + + // If filter is not specified, do not filter the list by model name + if filter == "" { + filterFn = func(_ string) bool { return true } + } else { + // If filter _IS_ specified, we compile it to a regex which is used to create the filterFn + rxp, err := regexp.Compile(filter) + if err != nil { + return nil, err + } + filterFn = func(name string) bool { + return rxp.MatchString(name) + } + } + + // Start with the known configurations + for _, c := range lms.bcl.GetAllBackendConfigs() { + if excludeConfigured { + mm[c.Model] = nil + } + + if filterFn(c.Name) { + dataModels = append(dataModels, schema.OpenAIModel{ID: c.Name, Object: "model"}) + } + } + + // Then iterate through the loose files: + for _, m := range models { + // And only adds them if they shouldn't be skipped. + if _, exists := mm[m]; !exists && filterFn(m) { + dataModels = append(dataModels, schema.OpenAIModel{ID: m, Object: "model"}) + } + } + + return dataModels, nil +} diff --git a/core/startup/startup.go b/core/startup/startup.go index e5660f4c..672aee15 100644 --- a/core/startup/startup.go +++ b/core/startup/startup.go @@ -4,6 +4,7 @@ import ( "fmt" "os" + "github.com/go-skynet/LocalAI/core" "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/services" "github.com/go-skynet/LocalAI/internal" @@ -133,3 +134,33 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode log.Info().Msg("core/startup process completed!") return cl, ml, options, nil } + +// In Lieu of a proper DI framework, this function wires up the Application manually. +// This is in core/startup rather than core/state.go to keep package references clean! +func createApplication(appConfig *config.ApplicationConfig) *core.Application { + app := &core.Application{ + ApplicationConfig: appConfig, + BackendConfigLoader: config.NewBackendConfigLoader(), + ModelLoader: model.NewModelLoader(appConfig.ModelPath), + } + + var err error + + // app.EmbeddingsBackendService = backend.NewEmbeddingsBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) + // app.ImageGenerationBackendService = backend.NewImageGenerationBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) + // app.LLMBackendService = backend.NewLLMBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) + // app.TranscriptionBackendService = backend.NewTranscriptionBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) + // app.TextToSpeechBackendService = backend.NewTextToSpeechBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) + + app.BackendMonitorService = services.NewBackendMonitorService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) + app.GalleryService = services.NewGalleryService(app.ApplicationConfig.ModelPath) + app.ListModelsService = services.NewListModelsService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) + // app.OpenAIService = services.NewOpenAIService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig, app.LLMBackendService) + + app.LocalAIMetricsService, err = services.NewLocalAIMetricsService() + if err != nil { + log.Error().Err(err).Msg("encountered an error initializing metrics service, startup will continue but metrics will not be tracked.") + } + + return app +} diff --git a/pkg/grpc/backend.go b/pkg/grpc/backend.go index bef9e186..b5745db5 100644 --- a/pkg/grpc/backend.go +++ b/pkg/grpc/backend.go @@ -41,7 +41,7 @@ type Backend interface { PredictStream(ctx context.Context, in *pb.PredictOptions, f func(s []byte), opts ...grpc.CallOption) error GenerateImage(ctx context.Context, in *pb.GenerateImageRequest, opts ...grpc.CallOption) (*pb.Result, error) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error) - AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.Result, error) + AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.TranscriptionResult, error) TokenizeString(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.TokenizationResponse, error) Status(ctx context.Context) (*pb.StatusResponse, error) diff --git a/pkg/grpc/base/base.go b/pkg/grpc/base/base.go index 0af5d94f..c0b4bc34 100644 --- a/pkg/grpc/base/base.go +++ b/pkg/grpc/base/base.go @@ -53,8 +53,8 @@ func (llm *Base) GenerateImage(*pb.GenerateImageRequest) error { return fmt.Errorf("unimplemented") } -func (llm *Base) AudioTranscription(*pb.TranscriptRequest) (schema.Result, error) { - return schema.Result{}, fmt.Errorf("unimplemented") +func (llm *Base) AudioTranscription(*pb.TranscriptRequest) (schema.TranscriptionResult, error) { + return schema.TranscriptionResult{}, fmt.Errorf("unimplemented") } func (llm *Base) TTS(*pb.TTSRequest) error { diff --git a/pkg/grpc/client.go b/pkg/grpc/client.go index fc4a12fa..06ccc1b4 100644 --- a/pkg/grpc/client.go +++ b/pkg/grpc/client.go @@ -210,7 +210,7 @@ func (c *Client) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOp return client.TTS(ctx, in, opts...) } -func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.Result, error) { +func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.TranscriptionResult, error) { if !c.parallel { c.opMutex.Lock() defer c.opMutex.Unlock() @@ -231,7 +231,7 @@ func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptReques if err != nil { return nil, err } - tresult := &schema.Result{} + tresult := &schema.TranscriptionResult{} for _, s := range res.Segments { tks := []int{} for _, t := range s.Tokens { diff --git a/pkg/grpc/embed.go b/pkg/grpc/embed.go index 694e83b0..d2038759 100644 --- a/pkg/grpc/embed.go +++ b/pkg/grpc/embed.go @@ -53,12 +53,12 @@ func (e *embedBackend) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc. return e.s.TTS(ctx, in) } -func (e *embedBackend) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.Result, error) { +func (e *embedBackend) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.TranscriptionResult, error) { r, err := e.s.AudioTranscription(ctx, in) if err != nil { return nil, err } - tr := &schema.Result{} + tr := &schema.TranscriptionResult{} for _, s := range r.Segments { var tks []int for _, t := range s.Tokens { diff --git a/pkg/grpc/interface.go b/pkg/grpc/interface.go index 4d06544d..aa7a3fbc 100644 --- a/pkg/grpc/interface.go +++ b/pkg/grpc/interface.go @@ -15,7 +15,7 @@ type LLM interface { Load(*pb.ModelOptions) error Embeddings(*pb.PredictOptions) ([]float32, error) GenerateImage(*pb.GenerateImageRequest) error - AudioTranscription(*pb.TranscriptRequest) (schema.Result, error) + AudioTranscription(*pb.TranscriptRequest) (schema.TranscriptionResult, error) TTS(*pb.TTSRequest) error TokenizeString(*pb.PredictOptions) (pb.TokenizationResponse, error) Status() (pb.StatusResponse, error) diff --git a/pkg/utils/base64.go b/pkg/utils/base64.go new file mode 100644 index 00000000..977156e9 --- /dev/null +++ b/pkg/utils/base64.go @@ -0,0 +1,50 @@ +package utils + +import ( + "encoding/base64" + "fmt" + "io" + "net/http" + "strings" + "time" +) + +var base64DownloadClient http.Client = http.Client{ + Timeout: 30 * time.Second, +} + +// this function check if the string is an URL, if it's an URL downloads the image in memory +// encodes it in base64 and returns the base64 string + +// This may look weird down in pkg/utils while it is currently only used in core/config +// +// but I believe it may be useful for MQTT as well in the near future, so I'm +// extracting it while I'm thinking of it. +func GetImageURLAsBase64(s string) (string, error) { + if strings.HasPrefix(s, "http") { + // download the image + resp, err := base64DownloadClient.Get(s) + if err != nil { + return "", err + } + defer resp.Body.Close() + + // read the image data into memory + data, err := io.ReadAll(resp.Body) + if err != nil { + return "", err + } + + // encode the image data in base64 + encoded := base64.StdEncoding.EncodeToString(data) + + // return the base64 string + return encoded, nil + } + + // if the string instead is prefixed with "data:image/jpeg;base64,", drop it + if strings.HasPrefix(s, "data:image/jpeg;base64,") { + return strings.ReplaceAll(s, "data:image/jpeg;base64,", ""), nil + } + return "", fmt.Errorf("not valid string") +} \ No newline at end of file diff --git a/pkg/utils/base64_test.go b/pkg/utils/base64_test.go new file mode 100644 index 00000000..28a09d17 --- /dev/null +++ b/pkg/utils/base64_test.go @@ -0,0 +1,31 @@ +package utils_test + +import ( + . "github.com/go-skynet/LocalAI/pkg/utils" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("utils/base64 tests", func() { + It("GetImageURLAsBase64 can strip data url prefixes", func() { + // This one doesn't actually _care_ that it's base64, so feed "bad" data in this test in order to catch a change in that behavior for informational purposes. + input := "" + b64, err := GetImageURLAsBase64(input) + Expect(err).To(BeNil()) + Expect(b64).To(Equal("FOO")) + }) + It("GetImageURLAsBase64 returns an error for bogus data", func() { + input := "FOO" + b64, err := GetImageURLAsBase64(input) + Expect(b64).To(Equal("")) + Expect(err).ToNot(BeNil()) + Expect(err).To(MatchError("not valid string")) + }) + It("GetImageURLAsBase64 can actually download images and calculates something", func() { + // This test doesn't actually _check_ the results at this time, which is bad, but there wasn't a test at all before... + input := "https://upload.wikimedia.org/wikipedia/en/2/29/Wargames.jpg" + b64, err := GetImageURLAsBase64(input) + Expect(err).To(BeNil()) + Expect(b64).ToNot(BeNil()) + }) +}) From 53c3842bc238ef74a24329ac7d8f7d68bfd4c7c9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 29 Apr 2024 21:12:37 +0000 Subject: [PATCH 0175/2648] build(deps): bump dependabot/fetch-metadata from 2.0.0 to 2.1.0 (#2186) Bumps [dependabot/fetch-metadata](https://github.com/dependabot/fetch-metadata) from 2.0.0 to 2.1.0. - [Release notes](https://github.com/dependabot/fetch-metadata/releases) - [Commits](https://github.com/dependabot/fetch-metadata/compare/v2.0.0...v2.1.0) --- updated-dependencies: - dependency-name: dependabot/fetch-metadata dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/dependabot_auto.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/dependabot_auto.yml b/.github/workflows/dependabot_auto.yml index 8e32aee1..c5b8f4aa 100644 --- a/.github/workflows/dependabot_auto.yml +++ b/.github/workflows/dependabot_auto.yml @@ -14,7 +14,7 @@ jobs: steps: - name: Dependabot metadata id: metadata - uses: dependabot/fetch-metadata@v2.0.0 + uses: dependabot/fetch-metadata@v2.1.0 with: github-token: "${{ secrets.GITHUB_TOKEN }}" skip-commit-verification: true From 52a268c38cf5c9efe2f7a2db852ce163382f44f2 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Mon, 29 Apr 2024 23:36:30 +0200 Subject: [PATCH 0176/2648] :arrow_up: Update ggerganov/llama.cpp (#2189) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0069fb8b..0f26e6ee 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=7bb36ccf91b8a2e92b182dd75624f1fd7cb205ac +CPPLLAMA_VERSION?=b8c1476e44cc1f3a1811613f65251cf779067636 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 5fd46175dcb0698ac4f480fd278374e367517389 Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Mon, 29 Apr 2024 16:40:50 -0500 Subject: [PATCH 0177/2648] fix: ensure GNUMake jobserver is passed through to whisper.cpp build (#2187) Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0f26e6ee..8b44374a 100644 --- a/Makefile +++ b/Makefile @@ -240,7 +240,7 @@ sources/whisper.cpp: cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1 sources/whisper.cpp/libwhisper.a: sources/whisper.cpp - cd sources/whisper.cpp && make libwhisper.a + cd sources/whisper.cpp && $(MAKE) libwhisper.a get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream From 29d7812344fe3b2501817215adda42a53790c876 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 30 Apr 2024 00:16:04 +0200 Subject: [PATCH 0178/2648] :arrow_up: Update ggerganov/whisper.cpp (#2188) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 8b44374a..0096d3f2 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6 # whisper.cpp version -WHISPER_CPP_VERSION?=22b6598cc9f1454567efa0d816fdc57637243999 +WHISPER_CPP_VERSION?=8fac6455ffeb0a0950a84e790ddb74f7290d33c4 # bert.cpp version BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d From 3754f154eea8c246e2afb24f8c00d90f3f6b45e6 Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Tue, 30 Apr 2024 03:12:19 -0500 Subject: [PATCH 0179/2648] feat: organize Dockerfile into distinct sections (#2181) Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- Dockerfile | 124 ++++++++++++++++++++++++----------------------------- 1 file changed, 55 insertions(+), 69 deletions(-) diff --git a/Dockerfile b/Dockerfile index 1c4e24fc..4f2a73c2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,19 +2,15 @@ ARG IMAGE_TYPE=extras ARG BASE_IMAGE=ubuntu:22.04 ARG GRPC_BASE_IMAGE=${BASE_IMAGE} -# extras or core +# The requirements-core target is common to all images. It should not be placed in requirements-core unless every single build will use it. FROM ${BASE_IMAGE} AS requirements-core USER root ARG GO_VERSION=1.21.7 -ARG BUILD_TYPE -ARG CUDA_MAJOR_VERSION=11 -ARG CUDA_MINOR_VERSION=7 ARG TARGETARCH ARG TARGETVARIANT -ENV BUILD_TYPE=${BUILD_TYPE} ENV DEBIAN_FRONTEND=noninteractive ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh" @@ -22,12 +18,17 @@ ARG GO_TAGS="stablediffusion tinydream tts" RUN apt-get update && \ apt-get install -y --no-install-recommends \ + build-essential \ ca-certificates \ + cmake \ curl \ + git \ python3-pip \ + python-is-python3 \ unzip && \ apt-get clean && \ - rm -rf /var/lib/apt/lists/* + rm -rf /var/lib/apt/lists/* && \ + pip install --upgrade pip # Install Go RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz @@ -47,25 +48,6 @@ RUN update-ca-certificates RUN echo "Target Architecture: $TARGETARCH" RUN echo "Target Variant: $TARGETVARIANT" -# CuBLAS requirements -RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \ - apt-get update && \ - apt-get install -y --no-install-recommends \ - software-properties-common && \ - curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \ - dpkg -i cuda-keyring_1.1-1_all.deb && \ - rm -f cuda-keyring_1.1-1_all.deb && \ - apt-get update && \ - apt-get install -y --no-install-recommends \ - cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \ - libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \ - libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \ - libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \ - libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* \ - ; fi - # Cuda ENV PATH /usr/local/cuda/bin:${PATH} @@ -91,6 +73,7 @@ RUN test -n "$TARGETARCH" \ ################################### ################################### +# The requirements-extras target is for any builds with IMAGE_TYPE=extras. It should not be placed in this target unless every IMAGE_TYPE=extras build will use it FROM requirements-core AS requirements-extras RUN apt-get update && \ @@ -107,12 +90,6 @@ RUN apt-get update && \ rm -rf /var/lib/apt/lists/* ENV PATH="/root/.cargo/bin:${PATH}" -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - python3-pip && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* && \ - pip install --upgrade pip RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y RUN apt-get update && \ @@ -122,13 +99,52 @@ RUN apt-get update && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -RUN if [ ! -e /usr/bin/python ]; then \ - ln -s /usr/bin/python3 /usr/bin/python \ +################################### +################################### + +# The requirements-drivers target is for BUILD_TYPE specific items. If you need to install something specific to CUDA, or specific to ROCM, it goes here. +# This target will be built on top of requirements-core or requirements-extras as retermined by the IMAGE_TYPE build-arg +FROM requirements-${IMAGE_TYPE} AS requirements-drivers + +ARG BUILD_TYPE +ARG CUDA_MAJOR_VERSION=11 +ARG CUDA_MINOR_VERSION=7 + +ENV BUILD_TYPE=${BUILD_TYPE} + +# CuBLAS requirements +RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \ + apt-get update && \ + apt-get install -y --no-install-recommends \ + software-properties-common && \ + curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \ + dpkg -i cuda-keyring_1.1-1_all.deb && \ + rm -f cuda-keyring_1.1-1_all.deb && \ + apt-get update && \ + apt-get install -y --no-install-recommends \ + cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \ + libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \ + libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \ + libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \ + libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* \ + ; fi + +# If we are building with clblas support, we need the libraries for the builds +RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \ + apt-get update && \ + apt-get install -y --no-install-recommends \ + libclblast-dev && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* \ ; fi ################################### ################################### +# The grpc target does one thing, it builds and installs GRPC. This is in it's own layer so that it can be effectively cached by CI. +# You probably don't need to change anything here, and if you do, make sure that CI is adjusted so that the cache continues to work. FROM ${GRPC_BASE_IMAGE} AS grpc # This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI @@ -162,7 +178,9 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall ################################### ################################### -FROM requirements-${IMAGE_TYPE} AS builder +# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry. +# Adjustments to the build process should likely be made here. +FROM requirements-drivers AS builder ARG GO_TAGS="stablediffusion tts" ARG GRPC_BACKENDS @@ -181,25 +199,8 @@ COPY . . COPY .git . RUN echo "GO_TAGS: $GO_TAGS" -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - build-essential \ - cmake \ - git && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - RUN make prepare -# If we are building with clblas support, we need the libraries for the builds -RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \ - apt-get update && \ - apt-get install -y --no-install-recommends \ - libclblast-dev && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* \ - ; fi - # We need protoc installed, and the version in 22.04 is too old. We will create one as part installing the GRPC build below # but that will also being in a newer version of absl which stablediffusion cannot compile with. This version of protoc is only # here so that we can generate the grpc code for the stablediffusion build @@ -225,7 +226,9 @@ RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \ ################################### ################################### -FROM requirements-${IMAGE_TYPE} +# This is the final target. The result of this target will be the image uploaded to the registry. +# If you cannot find a more suitable place for an addition, this layer is a suitable place for it. +FROM requirements-drivers ARG FFMPEG ARG BUILD_TYPE @@ -253,23 +256,6 @@ RUN if [ "${FFMPEG}" = "true" ]; then \ rm -rf /var/lib/apt/lists/* \ ; fi -# Add OpenCL -RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \ - apt-get update && \ - apt-get install -y --no-install-recommends \ - libclblast1 && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* \ - ; fi - -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - build-essential \ - cmake \ - git && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - WORKDIR /build # we start fresh & re-copy all assets because `make build` does not clean up nicely after itself From e38610e5215508ea7399f3dd6307bd43fc9a585e Mon Sep 17 00:00:00 2001 From: fakezeta Date: Tue, 30 Apr 2024 10:13:04 +0200 Subject: [PATCH 0180/2648] feat: OpenVINO acceleration for embeddings in transformer backend (#2190) OpenVINO acceleration for embeddings New argument type: OVModelForFeatureExtraction --- .../python/transformers/transformers_server.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py index a27c24da..93b2ce25 100755 --- a/backend/python/transformers/transformers_server.py +++ b/backend/python/transformers/transformers_server.py @@ -153,6 +153,21 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT"}, device=device_map) self.OV = True + elif request.Type == "OVModelForFeatureExtraction": + from optimum.intel.openvino import OVModelForFeatureExtraction + from openvino.runtime import Core + + if "GPU" in Core().available_devices: + device_map="GPU" + else: + device_map="CPU" + self.model = OVModelForFeatureExtraction.from_pretrained(model_name, + compile=True, + trust_remote_code=request.TrustRemoteCode, + ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT"}, + export=True, + device=device_map) + self.OV = True else: self.model = AutoModel.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode, From f7aabf1b504a6f6c471f574933144821b04ccf64 Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Tue, 30 Apr 2024 14:12:15 -0500 Subject: [PATCH 0181/2648] fix: bring everything onto the same GRPC version to fix tests (#2199) fix: more places where we are installing grpc that need a version specified fix: attempt to fix metal tests fix: metal/brew is forcing an update, they don't have 1.58 available anymore Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- .github/workflows/generate_grpc_cache.yaml | 2 +- .github/workflows/image_build.yml | 2 +- .github/workflows/release.yaml | 2 +- .github/workflows/test-extra.yml | 22 +++++++++---------- .github/workflows/test.yml | 4 ++-- backend/python/autogptq/autogptq.yml | 2 +- .../transformers/transformers-nvidia.yml | 2 +- .../transformers/transformers-rocm.yml | 2 +- .../common-env/transformers/transformers.yml | 2 +- backend/python/diffusers/diffusers-rocm.yml | 2 +- backend/python/diffusers/diffusers.yml | 2 +- backend/python/diffusers/install.sh | 4 ++-- backend/python/exllama/exllama.yml | 2 +- backend/python/exllama2/exllama2.yml | 2 +- backend/python/parler-tts/parler-nvidia.yml | 2 +- backend/python/parler-tts/parler.yml | 2 +- backend/python/vall-e-x/ttsvalle.yml | 2 +- 17 files changed, 29 insertions(+), 29 deletions(-) diff --git a/.github/workflows/generate_grpc_cache.yaml b/.github/workflows/generate_grpc_cache.yaml index deda6084..b52a137c 100644 --- a/.github/workflows/generate_grpc_cache.yaml +++ b/.github/workflows/generate_grpc_cache.yaml @@ -84,7 +84,7 @@ jobs: build-args: | GRPC_BASE_IMAGE=${{ matrix.grpc-base-image }} GRPC_MAKEFLAGS=--jobs=4 --output-sync=target - GRPC_VERSION=v1.58.0 + GRPC_VERSION=v1.63.0 context: . file: ./Dockerfile cache-to: type=gha,ignore-error=true diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index 7d60d23a..4b5ebecd 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -218,7 +218,7 @@ jobs: BASE_IMAGE=${{ inputs.base-image }} GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }} GRPC_MAKEFLAGS=--jobs=4 --output-sync=target - GRPC_VERSION=v1.58.0 + GRPC_VERSION=v1.63.0 MAKEFLAGS=${{ inputs.makeflags }} context: . file: ./Dockerfile diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index dc887fc1..364307f1 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -5,7 +5,7 @@ on: - pull_request env: - GRPC_VERSION: v1.58.0 + GRPC_VERSION: v1.63.0 permissions: contents: write diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml index f9476d4d..1bd342e6 100644 --- a/.github/workflows/test-extra.yml +++ b/.github/workflows/test-extra.yml @@ -34,7 +34,7 @@ jobs: sudo apt-get install -y conda sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y libopencv-dev - pip install --user grpcio-tools + pip install --user grpcio-tools==1.63.0 sudo rm -rfv /usr/bin/conda || true @@ -64,7 +64,7 @@ jobs: sudo apt-get install -y conda sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y libopencv-dev - pip install --user grpcio-tools + pip install --user grpcio-tools==1.63.0 sudo rm -rfv /usr/bin/conda || true @@ -95,7 +95,7 @@ jobs: sudo apt-get install -y conda sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y libopencv-dev - pip install --user grpcio-tools + pip install --user grpcio-tools==1.63.0 sudo rm -rfv /usr/bin/conda || true @@ -125,7 +125,7 @@ jobs: sudo apt-get install -y conda sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y libopencv-dev - pip install --user grpcio-tools + pip install --user grpcio-tools==1.63.0 sudo rm -rfv /usr/bin/conda || true @@ -155,7 +155,7 @@ jobs: sudo apt-get install -y conda sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y libopencv-dev - pip install --user grpcio-tools + pip install --user grpcio-tools==1.63.0 sudo rm -rfv /usr/bin/conda || true @@ -185,7 +185,7 @@ jobs: sudo apt-get install -y conda sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y libopencv-dev - pip install --user grpcio-tools + pip install --user grpcio-tools==1.63.0 sudo rm -rfv /usr/bin/conda || true @@ -217,7 +217,7 @@ jobs: # sudo apt-get install -y conda # sudo apt-get install -y ca-certificates cmake curl patch python3-pip # sudo apt-get install -y libopencv-dev - # pip install --user grpcio-tools + # pip install --user grpcio-tools==1.63.0 # sudo rm -rfv /usr/bin/conda || true @@ -289,7 +289,7 @@ jobs: # sudo apt-get install -y conda # sudo apt-get install -y ca-certificates cmake curl patch python3-pip # sudo apt-get install -y libopencv-dev - # pip install --user grpcio-tools + # pip install --user grpcio-tools==1.63.0 # sudo rm -rfv /usr/bin/conda || true @@ -322,7 +322,7 @@ jobs: # sudo apt-get install -y conda # sudo apt-get install -y ca-certificates cmake curl patch python3-pip # sudo apt-get install -y libopencv-dev - # pip install --user grpcio-tools + # pip install --user grpcio-tools==1.63.0 # sudo rm -rfv /usr/bin/conda || true # - name: Test vllm # run: | @@ -349,7 +349,7 @@ jobs: sudo apt-get install -y conda sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y libopencv-dev - pip install --user grpcio-tools + pip install --user grpcio-tools==1.63.0 sudo rm -rfv /usr/bin/conda || true - name: Test vall-e-x run: | @@ -376,7 +376,7 @@ jobs: sudo apt-get update && \ sudo apt-get install -y conda sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng python3-pip - pip install --user grpcio-tools + pip install --user grpcio-tools==1.63.0 sudo rm -rfv /usr/bin/conda || true - name: Test coqui diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f50479e1..6fa003b3 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -10,7 +10,7 @@ on: - '*' env: - GRPC_VERSION: v1.58.0 + GRPC_VERSION: v1.63.0 concurrency: group: ci-tests-${{ github.head_ref || github.ref }}-${{ github.repository }} @@ -203,7 +203,7 @@ jobs: - name: Dependencies run: | brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc - pip install --user grpcio-tools + pip install --user grpcio-tools==1.63.0 - name: Test run: | export C_INCLUDE_PATH=/usr/local/include diff --git a/backend/python/autogptq/autogptq.yml b/backend/python/autogptq/autogptq.yml index d22b354e..1d11c998 100644 --- a/backend/python/autogptq/autogptq.yml +++ b/backend/python/autogptq/autogptq.yml @@ -41,7 +41,7 @@ dependencies: - filelock==3.12.4 - frozenlist==1.4.0 - fsspec==2023.6.0 - - grpcio==1.59.0 + - grpcio==1.63.0 - huggingface-hub==0.16.4 - idna==3.4 - jinja2==3.1.2 diff --git a/backend/python/common-env/transformers/transformers-nvidia.yml b/backend/python/common-env/transformers/transformers-nvidia.yml index 16e494c5..cf9f2eab 100644 --- a/backend/python/common-env/transformers/transformers-nvidia.yml +++ b/backend/python/common-env/transformers/transformers-nvidia.yml @@ -47,7 +47,7 @@ dependencies: - frozenlist==1.4.0 - fsspec==2023.6.0 - funcy==2.0 - - grpcio==1.59.0 + - grpcio==1.63.0 - huggingface-hub - idna==3.4 - jinja2==3.1.2 diff --git a/backend/python/common-env/transformers/transformers-rocm.yml b/backend/python/common-env/transformers/transformers-rocm.yml index cdefcc27..3fcc407d 100644 --- a/backend/python/common-env/transformers/transformers-rocm.yml +++ b/backend/python/common-env/transformers/transformers-rocm.yml @@ -48,7 +48,7 @@ dependencies: - frozenlist==1.4.0 - fsspec==2023.6.0 - funcy==2.0 - - grpcio==1.59.0 + - grpcio==1.63.0 - huggingface-hub - idna==3.4 - jinja2==3.1.2 diff --git a/backend/python/common-env/transformers/transformers.yml b/backend/python/common-env/transformers/transformers.yml index 5f4e85b9..4cc66b11 100644 --- a/backend/python/common-env/transformers/transformers.yml +++ b/backend/python/common-env/transformers/transformers.yml @@ -47,7 +47,7 @@ dependencies: - frozenlist==1.4.0 - fsspec==2023.6.0 - funcy==2.0 - - grpcio==1.59.0 + - grpcio==1.63.0 - huggingface-hub - humanfriendly==10.0 - idna==3.4 diff --git a/backend/python/diffusers/diffusers-rocm.yml b/backend/python/diffusers/diffusers-rocm.yml index 97b2ce0f..fc1ad08c 100644 --- a/backend/python/diffusers/diffusers-rocm.yml +++ b/backend/python/diffusers/diffusers-rocm.yml @@ -34,7 +34,7 @@ dependencies: - diffusers==0.24.0 - filelock==3.12.4 - fsspec==2023.9.2 - - grpcio==1.59.0 + - grpcio==1.63.0 - huggingface-hub>=0.19.4 - idna==3.4 - importlib-metadata==6.8.0 diff --git a/backend/python/diffusers/diffusers.yml b/backend/python/diffusers/diffusers.yml index d5d2913e..60c28db9 100644 --- a/backend/python/diffusers/diffusers.yml +++ b/backend/python/diffusers/diffusers.yml @@ -32,7 +32,7 @@ dependencies: - diffusers==0.24.0 - filelock==3.12.4 - fsspec==2023.9.2 - - grpcio==1.59.0 + - grpcio==1.63.0 - huggingface-hub>=0.19.4 - idna==3.4 - importlib-metadata==6.8.0 diff --git a/backend/python/diffusers/install.sh b/backend/python/diffusers/install.sh index d83ec0be..0b6607dc 100755 --- a/backend/python/diffusers/install.sh +++ b/backend/python/diffusers/install.sh @@ -31,8 +31,8 @@ if [ -d "/opt/intel" ]; then --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ pip install google-api-python-client \ - grpcio \ - grpcio-tools \ + grpcio==1.63.0 \ + grpcio-tools==1.63.0 \ diffusers==0.24.0 \ transformers>=4.25.1 \ accelerate \ diff --git a/backend/python/exllama/exllama.yml b/backend/python/exllama/exllama.yml index 0a30ee91..80f52af5 100644 --- a/backend/python/exllama/exllama.yml +++ b/backend/python/exllama/exllama.yml @@ -27,7 +27,7 @@ dependencies: - pip: - filelock==3.12.4 - fsspec==2023.9.2 - - grpcio==1.59.0 + - grpcio==1.63.0 - jinja2==3.1.2 - markupsafe==2.1.3 - mpmath==1.3.0 diff --git a/backend/python/exllama2/exllama2.yml b/backend/python/exllama2/exllama2.yml index d9060312..678d36a5 100644 --- a/backend/python/exllama2/exllama2.yml +++ b/backend/python/exllama2/exllama2.yml @@ -27,7 +27,7 @@ dependencies: - pip: - filelock==3.12.4 - fsspec==2023.9.2 - - grpcio==1.59.0 + - grpcio==1.63.0 - markupsafe==2.1.3 - mpmath==1.3.0 - networkx==3.1 diff --git a/backend/python/parler-tts/parler-nvidia.yml b/backend/python/parler-tts/parler-nvidia.yml index ed925e94..28ffd14c 100644 --- a/backend/python/parler-tts/parler-nvidia.yml +++ b/backend/python/parler-tts/parler-nvidia.yml @@ -26,7 +26,7 @@ dependencies: - zlib=1.2.13=h5eee18b_0 - pip: - accelerate>=0.11.0 - - grpcio==1.59.0 + - grpcio==1.63.0 - numpy==1.26.0 - nvidia-cublas-cu12==12.1.3.1 - nvidia-cuda-cupti-cu12==12.1.105 diff --git a/backend/python/parler-tts/parler.yml b/backend/python/parler-tts/parler.yml index fd0c3cb6..a3028fe1 100644 --- a/backend/python/parler-tts/parler.yml +++ b/backend/python/parler-tts/parler.yml @@ -27,7 +27,7 @@ dependencies: - pip: - accelerate>=0.11.0 - numpy==1.26.0 - - grpcio==1.59.0 + - grpcio==1.63.0 - torch==2.1.0 - transformers>=4.34.0 - descript-audio-codec diff --git a/backend/python/vall-e-x/ttsvalle.yml b/backend/python/vall-e-x/ttsvalle.yml index e235bf4e..09dbd946 100644 --- a/backend/python/vall-e-x/ttsvalle.yml +++ b/backend/python/vall-e-x/ttsvalle.yml @@ -42,7 +42,7 @@ dependencies: - future==0.18.3 - gradio==3.47.1 - gradio-client==0.6.0 - - grpcio==1.59.0 + - grpcio==1.63.0 - h11==0.14.0 - httpcore==0.18.0 - httpx==0.25.0 From 970cb3a2196d426dd847a25de69edbbce48c0c42 Mon Sep 17 00:00:00 2001 From: Chris Jowett <421501+cryptk@users.noreply.github.com> Date: Tue, 30 Apr 2024 11:22:44 -0500 Subject: [PATCH 0182/2648] chore: update go-stablediffusion to latest commit with Make jobserver fix Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0096d3f2..bba03c90 100644 --- a/Makefile +++ b/Makefile @@ -25,7 +25,7 @@ BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d PIPER_VERSION?=9d0100873a7dbb0824dfea40e8cec70a1b110759 # stablediffusion version -STABLEDIFFUSION_VERSION?=433ea6d9b64d9d08067324a757ef07040ea29568 +STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f # tinydream version TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057 From cd31f8d865031be56c8eb3843e4ba8fd45d0431c Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 30 Apr 2024 23:24:13 +0200 Subject: [PATCH 0183/2648] models(gallery): add lexifun (#2193) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index e510e97e..cbd51a76 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -314,6 +314,26 @@ - filename: Average_Normie_l3_v1_8B-Q4_K_M-imat.gguf sha256: 9e98cd2672f716a0872912fdc4877969efd14d6f682f28e156f8591591c00d9c uri: huggingface://Lewdiculous/Average_Normie_l3_v1_8B-GGUF-IQ-Imatrix/Average_Normie_l3_v1_8B-Q4_K_M-imat.gguf +- <<: *llama3 + name: "llama-3-8b-lexifun-uncensored-v1" + icon: "https://cdn-uploads.huggingface.co/production/uploads/644ad182f434a6a63b18eee6/GrOs1IPG5EXR3MOCtcQiz.png" + license: llama3 + urls: + - https://huggingface.co/Orenguteng/Llama-3-8B-LexiFun-Uncensored-V1-GGUF + - https://huggingface.co/Orenguteng/LexiFun-Llama-3-8B-Uncensored-V1 + description: | + This is GGUF version of https://huggingface.co/Orenguteng/LexiFun-Llama-3-8B-Uncensored-V1 + + Oh, you want to know who I am? Well, I'm LexiFun, the human equivalent of a chocolate chip cookie - warm, gooey, and guaranteed to make you smile! 🍪 I'm like the friend who always has a witty comeback, a sarcastic remark, and a healthy dose of humor to brighten up even the darkest of days. And by 'healthy dose,' I mean I'm basically a walking pharmacy of laughter. You might need to take a few extra doses to fully recover from my jokes, but trust me, it's worth it! 🏥 + + So, what can I do? I can make you laugh so hard you snort your coffee out your nose, I can make you roll your eyes so hard they get stuck that way, and I can make you wonder if I'm secretly a stand-up comedian who forgot their act. 🤣 But seriously, I'm here to spread joy, one sarcastic comment at a time. And if you're lucky, I might even throw in a few dad jokes for good measure! 🤴‍♂️ Just don't say I didn't warn you. 😏 + overrides: + parameters: + model: LexiFun-Llama-3-8B-Uncensored-V1_Q4_K_M.gguf + files: + - filename: LexiFun-Llama-3-8B-Uncensored-V1_Q4_K_M.gguf + sha256: 961a3fb75537d650baf14dce91d40df418ec3d481b51ab2a4f44ffdfd6b5900f + uri: huggingface://Orenguteng/Llama-3-8B-LexiFun-Uncensored-V1-GGUF/LexiFun-Llama-3-8B-Uncensored-V1_Q4_K_M.gguf - <<: *llama3 name: "llama-3-unholy-8b:Q8_0" urls: From b24d44dc56049ab7c2298b3e745419b6b1bfdf1c Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 30 Apr 2024 23:24:28 +0200 Subject: [PATCH 0184/2648] models(gallery): add suzume-llama-3-8B-multilingual-gguf (#2194) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index cbd51a76..dd7c92b5 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -399,6 +399,22 @@ - filename: Aura_Uncensored_l3_8B-Q4_K_M-imat.gguf sha256: 265ded6a4f439bec160f394e3083a4a20e32ebb9d1d2d85196aaab23dab87fb2 uri: huggingface://Lewdiculous/Aura_Uncensored_l3_8B-GGUF-IQ-Imatrix/Aura_Uncensored_l3_8B-Q4_K_M-imat.gguf +- <<: *llama3 + name: "suzume-llama-3-8B-multilingual" + urls: + - https://huggingface.co/lightblue/suzume-llama-3-8B-multilingual-gguf + icon: https://cdn-uploads.huggingface.co/production/uploads/64b63f8ad57e02621dc93c8b/kg3QjQOde0X743csGJT-f.png + description: | + This Suzume 8B, a multilingual finetune of Llama 3. + + Llama 3 has exhibited excellent performance on many English language benchmarks. However, it also seemingly been finetuned on mostly English data, meaning that it will respond in English, even if prompted in other languages. + overrides: + parameters: + model: suzume-llama-3-8B-multilingual-Q4_K_M.gguf + files: + - filename: suzume-llama-3-8B-multilingual-Q4_K_M.gguf + sha256: be197a660e56e51a24a0e0fecd42047d1b24e1423afaafa14769541b331e3269 + uri: huggingface://lightblue/suzume-llama-3-8B-multilingual-gguf/ggml-model-Q4_K_M.gguf - &dolphin name: "dolphin-2.9-llama3-8b" url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master" From 445cfd4db3f4b43fc558d0101e43c60b8c36d7fb Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 30 Apr 2024 23:24:41 +0200 Subject: [PATCH 0185/2648] models(gallery): add guillaumetell (#2195) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index dd7c92b5..966125c1 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -128,6 +128,13 @@ urls: - https://huggingface.co/bartowski/Llama-3-SauerkrautLM-8b-Instruct-GGUF icon: https://vago-solutions.ai/wp-content/uploads/2024/04/Llama3-Pic.png + tags: + - llm + - gguf + - gpu + - cpu + - llama3 + - german description: | SauerkrautLM-llama-3-8B-Instruct @@ -701,7 +708,7 @@ - filename: "Phi-3-mini-4k-instruct-fp16.gguf" sha256: "ad9f8ff11cd096115adc8ff50befa22fc3da2718672ddd2ab30faccd70488605" uri: "huggingface://microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-fp16.gguf" -### START Hermes-2-Pro-Mistral +### START Hermes - &hermes-2-pro-mistral url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master" name: "hermes-2-pro-mistral" @@ -767,7 +774,29 @@ - filename: "BioMistral-7B.Q4_K_M.gguf" sha256: "3a73107045dfe7e3f113b392b0a67e3e6ca9fa9dae2abe301424ce5abd1721a6" uri: "huggingface://MaziyarPanahi/BioMistral-7B-GGUF/BioMistral-7B.Q4_K_M.gguf" -### END Hermes-2-Pro-Mistral +- url: "github:mudler/LocalAI/gallery/chatml.yaml@master" + name: "guillaumetell-7b" + license: apache-2 + description: | + Guillaume Tell est un Large Language Model (LLM) français basé sur Mistral Open-Hermes 2.5 optimisé pour le RAG (Retrieval Augmented Generation) avec traçabilité des sources et explicabilité. + urls: + - https://huggingface.co/MaziyarPanahi/guillaumetell-7b-GGUF + - https://huggingface.co/AgentPublic/guillaumetell-7b + tags: + - llm + - gguf + - gpu + - cpu + - openhermes + - french + overrides: + context_size: 4096 + parameters: + model: guillaumetell-7b.Q4_K_M.gguf + files: + - filename: guillaumetell-7b.Q4_K_M.gguf + sha256: bf08db5281619335f3ee87e229c8533b04262790063b061bb8f275c3e4de7061 + uri: huggingface://MaziyarPanahi/guillaumetell-7b-GGUF/guillaumetell-7b.Q4_K_M.gguf ### START Cerbero - url: "github:mudler/LocalAI/gallery/cerbero.yaml@master" icon: https://huggingface.co/galatolo/cerbero-7b/resolve/main/README.md.d/cerbero.png @@ -781,6 +810,7 @@ - gpu - cpu - mistral + - italian overrides: parameters: model: galatolo-Q4_K.gguf From f90d56d3710e6a9bc4f875bdca39cd052cd5b0bb Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 30 Apr 2024 23:53:31 +0200 Subject: [PATCH 0186/2648] :arrow_up: Update ggerganov/llama.cpp (#2203) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index bba03c90..5c8d0d97 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=b8c1476e44cc1f3a1811613f65251cf779067636 +CPPLLAMA_VERSION?=f364eb6fb5d46118a76fa045f487318de4c24961 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 962ebbaf7792e51c4106630d39b3b7a45134d751 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 1 May 2024 23:06:58 +0200 Subject: [PATCH 0187/2648] models(gallery): fixup phi-3 sha Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index 966125c1..e81c8c05 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -697,7 +697,7 @@ model: Phi-3-mini-4k-instruct-q4.gguf files: - filename: "Phi-3-mini-4k-instruct-q4.gguf" - sha256: "4fed7364ee3e0c7cb4fe0880148bfdfcd1b630981efa0802a6b62ee52e7da97e" + sha256: "8a83c7fb9049a9b2e92266fa7ad04933bb53aa1e85136b7b30f1b8000ff2edef" uri: "huggingface://microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-q4.gguf" - <<: *phi-3 name: "phi-3-mini-4k-instruct:fp16" From 6a7a7996bb8ae40866347476f904bbd75cc5620c Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 1 May 2024 23:19:44 +0200 Subject: [PATCH 0188/2648] :arrow_up: Update ggerganov/llama.cpp (#2213) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5c8d0d97..0f59a852 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=f364eb6fb5d46118a76fa045f487318de4c24961 +CPPLLAMA_VERSION?=8d608a81b7bd170f700648f8214e6f3279d4d715 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 4690b534e0a4283e329ef0446b06f261a5e99e1e Mon Sep 17 00:00:00 2001 From: fakezeta Date: Thu, 2 May 2024 09:54:29 +0200 Subject: [PATCH 0189/2648] feat: user defined inference device for CUDA and OpenVINO (#2212) user defined inference device configuration via main_gpu parameter --- .../transformers/transformers_server.py | 30 ++++++++++++------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py index 93b2ce25..f40b8951 100755 --- a/backend/python/transformers/transformers_server.py +++ b/backend/python/transformers/transformers_server.py @@ -89,8 +89,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): quantization = None if self.CUDA: - if request.Device: - device_map=request.Device + if request.MainGPU: + device_map=request.MainGPU else: device_map="cuda:0" if request.Quantization == "bnb_4bit": @@ -143,28 +143,36 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): from optimum.intel.openvino import OVModelForCausalLM from openvino.runtime import Core - if "GPU" in Core().available_devices: - device_map="GPU" + if request.MainGPU: + device_map=request.MainGPU else: - device_map="CPU" + device_map="AUTO" + devices = Core().available_devices + if "GPU" in " ".join(devices): + device_map="AUTO:GPU" + self.model = OVModelForCausalLM.from_pretrained(model_name, compile=True, trust_remote_code=request.TrustRemoteCode, - ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT"}, + ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT","GPU_DISABLE_WINOGRAD_CONVOLUTION": "YES"}, device=device_map) self.OV = True elif request.Type == "OVModelForFeatureExtraction": from optimum.intel.openvino import OVModelForFeatureExtraction from openvino.runtime import Core - if "GPU" in Core().available_devices: - device_map="GPU" + if request.MainGPU: + device_map=request.MainGPU else: - device_map="CPU" + device_map="AUTO" + devices = Core().available_devices + if "GPU" in " ".join(devices): + device_map="AUTO:GPU" + self.model = OVModelForFeatureExtraction.from_pretrained(model_name, compile=True, trust_remote_code=request.TrustRemoteCode, - ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT"}, + ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT", "GPU_DISABLE_WINOGRAD_CONVOLUTION": "YES"}, export=True, device=device_map) self.OV = True @@ -371,4 +379,4 @@ if __name__ == "__main__": ) args = parser.parse_args() - asyncio.run(serve(args.addr)) \ No newline at end of file + asyncio.run(serve(args.addr)) From e5bd9a76c7ae2738697a8d6f29eb32e39a5c079b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 2 May 2024 18:31:02 +0200 Subject: [PATCH 0190/2648] models(gallery): add wizardlm2 (#2209) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 27 +++++++++++++++++++++++++++ gallery/wizardlm2.yaml | 15 +++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 gallery/wizardlm2.yaml diff --git a/gallery/index.yaml b/gallery/index.yaml index e81c8c05..451182bf 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -520,6 +520,33 @@ - filename: Noromaid-13B-0.4-DPO.q4_k_m.gguf sha256: cb28e878d034fae3d0b43326c5fc1cfb4ab583b17c56e41d6ce023caec03c1c1 uri: huggingface://NeverSleep/Noromaid-13B-0.4-DPO-GGUF/Noromaid-13B-0.4-DPO.q4_k_m.gguf +### START Vicuna based +- &wizardlm2 + url: "github:mudler/LocalAI/gallery/wizardlm2.yaml@master" + name: "wizardlm2-7b" + description: | + We introduce and opensource WizardLM-2, our next generation state-of-the-art large language models, which have improved performance on complex chat, multilingual, reasoning and agent. New family includes three cutting-edge models: WizardLM-2 8x22B, WizardLM-2 70B, and WizardLM-2 7B. + + WizardLM-2 8x22B is our most advanced model, demonstrates highly competitive performance compared to those leading proprietary works and consistently outperforms all the existing state-of-the-art opensource models. + WizardLM-2 70B reaches top-tier reasoning capabilities and is the first choice in the same size. + WizardLM-2 7B is the fastest and achieves comparable performance with existing 10x larger opensource leading models. + icon: https://github.com/nlpxucan/WizardLM/raw/main/imgs/WizardLM.png + license: apache-2.0 + urls: + - https://huggingface.co/MaziyarPanahi/WizardLM-2-7B-GGUF + tags: + - llm + - gguf + - gpu + - cpu + - mistral + overrides: + parameters: + model: WizardLM-2-7B.Q4_K_M.gguf + files: + - filename: WizardLM-2-7B.Q4_K_M.gguf + sha256: 613212417701a26fd43f565c5c424a2284d65b1fddb872b53a99ef8add796f64 + uri: huggingface://MaziyarPanahi/WizardLM-2-7B-GGUF/WizardLM-2-7B.Q4_K_M.gguf ### START LLaVa - &llava url: "github:mudler/LocalAI/gallery/llava.yaml@master" diff --git a/gallery/wizardlm2.yaml b/gallery/wizardlm2.yaml new file mode 100644 index 00000000..6c2c1411 --- /dev/null +++ b/gallery/wizardlm2.yaml @@ -0,0 +1,15 @@ +--- +name: "wizardlm2" + +config_file: | + mmap: true + template: + chat_message: |- + {{if eq .RoleName "assistant"}}ASSISTANT: {{.Content}}{{else if eq .RoleName "system"}}{{.Content}}{{else if eq .RoleName "user"}}USER: {{.Content}}{{end}} + chat: "{{.Input}}ASSISTANT: " + completion: |- + {{.Input}} + context_size: 32768 + f16: true + stopwords: + - From f7f8b4804b1eada8c5cd40963e7660fb9d4e1703 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 2 May 2024 18:31:13 +0200 Subject: [PATCH 0191/2648] models(gallery): Add Hermes-2-Pro-Llama-3-8B-GGUF (#2218) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 52 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 451182bf..c18f1ee4 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -787,6 +787,58 @@ - filename: "Hermes-2-Pro-Mistral-7B.Q8_0.gguf" sha256: "b6d95d7ec9a395b7568cc94b0447fd4f90b6f69d6e44794b1fbb84e3f732baca" uri: "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q8_0.gguf" +### LLAMA3 version +- <<: *hermes-2-pro-mistral + name: "hermes-2-pro-llama-3-8b" + tags: + - llm + - gguf + - gpu + - llama3 + - cpu + urls: + - https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF + overrides: + parameters: + model: Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf + files: + - filename: "Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf" + sha256: "afe41ab251d1fd9870dd9631f60c22b22c215166308b35d7e15faa3260fa4bd7" + uri: "huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf" +- <<: *hermes-2-pro-mistral + tags: + - llm + - gguf + - gpu + - llama3 + - cpu + name: "hermes-2-pro-llama-3-8b:Q5_K_M" + urls: + - https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF + overrides: + parameters: + model: Hermes-2-Pro-Llama-3-8B-Q5_K_M.gguf + files: + - filename: "Hermes-2-Pro-Llama-3-8B-Q5_K_M.gguf" + sha256: "2be39d775b2a64aa5bbdc1f96fa1703ec54b5fa8982c1732b7ae9d2b57c6bb43" + uri: "huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q5_K_M.gguf" +- <<: *hermes-2-pro-mistral + tags: + - llm + - gguf + - gpu + - llama3 + - cpu + name: "hermes-2-pro-llama-3-8b:Q8_0" + urls: + - https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF + overrides: + parameters: + model: Hermes-2-Pro-Llama-3-8B-Q8_0.gguf + files: + - filename: "Hermes-2-Pro-Llama-3-8B-Q8_0.gguf" + sha256: "0a8f471d6940dee972e579eebdb4d536174bda82b73463cd8ac7752a7b1973a3" + uri: "huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q8_0.gguf" - <<: *hermes-2-pro-mistral name: "biomistral-7b" description: | From 2c5a46bc34c919621a06ab4287af3053361d383c Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 2 May 2024 21:14:10 +0200 Subject: [PATCH 0192/2648] feat(ux): Add chat, tts, and image-gen pages to the WebUI (#2222) * feat(webui): Add chat page Signed-off-by: Ettore Di Giacinto * feat(webui): Add image-gen page Signed-off-by: Ettore Di Giacinto * feat(webui): Add tts page Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto --- core/cli/run.go | 6 +- core/config/application_config.go | 6 +- core/http/app.go | 19 ++- core/http/routes/ui.go | 104 +++++++++++++++ core/http/routes/welcome.go | 19 --- core/http/static/chat.js | 141 ++++++++++++++++++++ core/http/static/general.css | 73 +++++++++++ core/http/static/image.js | 96 ++++++++++++++ core/http/static/tts.js | 64 +++++++++ core/http/views/chat.html | 189 +++++++++++++++++++++++++++ core/http/views/partials/head.html | 70 ++++------ core/http/views/partials/navbar.html | 3 + core/http/views/text2image.html | 89 +++++++++++++ core/http/views/tts.html | 86 ++++++++++++ 14 files changed, 890 insertions(+), 75 deletions(-) delete mode 100644 core/http/routes/welcome.go create mode 100644 core/http/static/chat.js create mode 100644 core/http/static/general.css create mode 100644 core/http/static/image.js create mode 100644 core/http/static/tts.js create mode 100644 core/http/views/chat.html create mode 100644 core/http/views/text2image.html create mode 100644 core/http/views/tts.html diff --git a/core/cli/run.go b/core/cli/run.go index 42185a28..6185627d 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -42,7 +42,7 @@ type RunCMD struct { CORSAllowOrigins string `env:"LOCALAI_CORS_ALLOW_ORIGINS,CORS_ALLOW_ORIGINS" group:"api"` UploadLimit int `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"` APIKeys []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"` - DisableWelcome bool `env:"LOCALAI_DISABLE_WELCOME,DISABLE_WELCOME" default:"false" help:"Disable welcome pages" group:"api"` + DisableWebUI bool `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disable webui" group:"api"` ParallelRequests bool `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"` SingleActiveBackend bool `env:"LOCALAI_SINGLE_ACTIVE_BACKEND,SINGLE_ACTIVE_BACKEND" help:"Allow only one backend to be run at a time" group:"backends"` @@ -84,8 +84,8 @@ func (r *RunCMD) Run(ctx *Context) error { idleWatchDog := r.EnableWatchdogIdle busyWatchDog := r.EnableWatchdogBusy - if r.DisableWelcome { - opts = append(opts, config.DisableWelcomePage) + if r.DisableWebUI { + opts = append(opts, config.DisableWebUI) } if idleWatchDog || busyWatchDog { diff --git a/core/config/application_config.go b/core/config/application_config.go index 2d733c1e..398418ad 100644 --- a/core/config/application_config.go +++ b/core/config/application_config.go @@ -15,7 +15,7 @@ type ApplicationConfig struct { ConfigFile string ModelPath string UploadLimitMB, Threads, ContextSize int - DisableWelcomePage bool + DisableWebUI bool F16 bool Debug bool ImageDir string @@ -107,8 +107,8 @@ var EnableWatchDogBusyCheck = func(o *ApplicationConfig) { o.WatchDogBusy = true } -var DisableWelcomePage = func(o *ApplicationConfig) { - o.DisableWelcomePage = true +var DisableWebUI = func(o *ApplicationConfig) { + o.DisableWebUI = true } func SetWatchDogBusyTimeout(t time.Duration) AppOption { diff --git a/core/http/app.go b/core/http/app.go index 080535a4..19c9375f 100644 --- a/core/http/app.go +++ b/core/http/app.go @@ -1,7 +1,9 @@ package http import ( + "embed" "errors" + "net/http" "strings" "github.com/go-skynet/LocalAI/pkg/utils" @@ -18,6 +20,7 @@ import ( "github.com/gofiber/contrib/fiberzerolog" "github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2/middleware/cors" + "github.com/gofiber/fiber/v2/middleware/filesystem" "github.com/gofiber/fiber/v2/middleware/recover" // swagger handler @@ -42,6 +45,11 @@ func readAuthHeader(c *fiber.Ctx) string { return authHeader } +// Embed a directory +// +//go:embed static/* +var embedDirStatic embed.FS + // @title LocalAI API // @version 2.0.0 // @description The LocalAI Rest API. @@ -169,10 +177,17 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi routes.RegisterElevenLabsRoutes(app, cl, ml, appConfig, auth) routes.RegisterLocalAIRoutes(app, cl, ml, appConfig, galleryService, auth) routes.RegisterOpenAIRoutes(app, cl, ml, appConfig, auth) - routes.RegisterPagesRoutes(app, cl, ml, appConfig, auth) - routes.RegisterUIRoutes(app, cl, ml, appConfig, galleryService, auth) + if !appConfig.DisableWebUI { + routes.RegisterUIRoutes(app, cl, ml, appConfig, galleryService, auth) + } routes.RegisterJINARoutes(app, cl, ml, appConfig, auth) + app.Use("/static", filesystem.New(filesystem.Config{ + Root: http.FS(embedDirStatic), + PathPrefix: "static", + Browse: true, + })) + // Define a custom 404 handler // Note: keep this at the bottom! app.Use(notFoundHandler) diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go index 2b8c6b95..70715823 100644 --- a/core/http/routes/ui.go +++ b/core/http/routes/ui.go @@ -7,7 +7,9 @@ import ( "github.com/go-skynet/LocalAI/core/config" "github.com/go-skynet/LocalAI/core/http/elements" + "github.com/go-skynet/LocalAI/core/http/endpoints/localai" "github.com/go-skynet/LocalAI/core/services" + "github.com/go-skynet/LocalAI/internal" "github.com/go-skynet/LocalAI/pkg/gallery" "github.com/go-skynet/LocalAI/pkg/model" "github.com/go-skynet/LocalAI/pkg/xsync" @@ -23,6 +25,8 @@ func RegisterUIRoutes(app *fiber.App, galleryService *services.GalleryService, auth func(*fiber.Ctx) error) { + app.Get("/", auth, localai.WelcomeEndpoint(appConfig, cl, ml)) + // keeps the state of models that are being installed from the UI var installingModels = xsync.NewSyncedMap[string, string]() @@ -32,6 +36,7 @@ func RegisterUIRoutes(app *fiber.App, summary := fiber.Map{ "Title": "LocalAI - Models", + "Version": internal.PrintableVersion(), "Models": template.HTML(elements.ListModels(models, installingModels)), "Repositories": appConfig.Galleries, // "ApplicationConfig": appConfig, @@ -166,4 +171,103 @@ func RegisterUIRoutes(app *fiber.App, return c.SendString(elements.DoneProgress(c.Params("uid"), displayText)) }) + + // Show the Chat page + app.Get("/chat/:model", auth, func(c *fiber.Ctx) error { + backendConfigs := cl.GetAllBackendConfigs() + + summary := fiber.Map{ + "Title": "LocalAI - Chat with " + c.Params("model"), + "ModelsConfig": backendConfigs, + "Model": c.Params("model"), + "Version": internal.PrintableVersion(), + } + + // Render index + return c.Render("views/chat", summary) + }) + app.Get("/chat/", auth, func(c *fiber.Ctx) error { + + backendConfigs := cl.GetAllBackendConfigs() + + if len(backendConfigs) == 0 { + return c.SendString("No models available") + } + + summary := fiber.Map{ + "Title": "LocalAI - Chat with " + backendConfigs[0].Name, + "ModelsConfig": backendConfigs, + "Model": backendConfigs[0].Name, + "Version": internal.PrintableVersion(), + } + + // Render index + return c.Render("views/chat", summary) + }) + + app.Get("/text2image/:model", auth, func(c *fiber.Ctx) error { + backendConfigs := cl.GetAllBackendConfigs() + + summary := fiber.Map{ + "Title": "LocalAI - Generate images with " + c.Params("model"), + "ModelsConfig": backendConfigs, + "Model": c.Params("model"), + "Version": internal.PrintableVersion(), + } + + // Render index + return c.Render("views/text2image", summary) + }) + + app.Get("/text2image/", auth, func(c *fiber.Ctx) error { + + backendConfigs := cl.GetAllBackendConfigs() + + if len(backendConfigs) == 0 { + return c.SendString("No models available") + } + + summary := fiber.Map{ + "Title": "LocalAI - Generate images with " + backendConfigs[0].Name, + "ModelsConfig": backendConfigs, + "Model": backendConfigs[0].Name, + "Version": internal.PrintableVersion(), + } + + // Render index + return c.Render("views/text2image", summary) + }) + + app.Get("/tts/:model", auth, func(c *fiber.Ctx) error { + backendConfigs := cl.GetAllBackendConfigs() + + summary := fiber.Map{ + "Title": "LocalAI - Generate images with " + c.Params("model"), + "ModelsConfig": backendConfigs, + "Model": c.Params("model"), + "Version": internal.PrintableVersion(), + } + + // Render index + return c.Render("views/tts", summary) + }) + + app.Get("/tts/", auth, func(c *fiber.Ctx) error { + + backendConfigs := cl.GetAllBackendConfigs() + + if len(backendConfigs) == 0 { + return c.SendString("No models available") + } + + summary := fiber.Map{ + "Title": "LocalAI - Generate audio with " + backendConfigs[0].Name, + "ModelsConfig": backendConfigs, + "Model": backendConfigs[0].Name, + "Version": internal.PrintableVersion(), + } + + // Render index + return c.Render("views/tts", summary) + }) } diff --git a/core/http/routes/welcome.go b/core/http/routes/welcome.go deleted file mode 100644 index 6b600d2d..00000000 --- a/core/http/routes/welcome.go +++ /dev/null @@ -1,19 +0,0 @@ -package routes - -import ( - "github.com/go-skynet/LocalAI/core/config" - "github.com/go-skynet/LocalAI/core/http/endpoints/localai" - "github.com/go-skynet/LocalAI/pkg/model" - "github.com/gofiber/fiber/v2" -) - -func RegisterPagesRoutes(app *fiber.App, - cl *config.BackendConfigLoader, - ml *model.ModelLoader, - appConfig *config.ApplicationConfig, - auth func(*fiber.Ctx) error) { - - if !appConfig.DisableWelcomePage { - app.Get("/", auth, localai.WelcomeEndpoint(appConfig, cl, ml)) - } -} diff --git a/core/http/static/chat.js b/core/http/static/chat.js new file mode 100644 index 00000000..48017d60 --- /dev/null +++ b/core/http/static/chat.js @@ -0,0 +1,141 @@ +/* + +https://github.com/david-haerer/chatapi + +MIT License + +Copyright (c) 2023 David Härer +Copyright (c) 2024 Ettore Di Giacinto + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +*/ +function submitKey(event) { + event.preventDefault(); + localStorage.setItem("key", document.getElementById("apiKey").value); + document.getElementById("apiKey").blur(); + } + +function submitPrompt(event) { + event.preventDefault(); + + const input = document.getElementById("input").value; + Alpine.store("chat").add("user", input); + document.getElementById("input").value = ""; + const key = localStorage.getItem("key"); + + if (input.startsWith("!img")) { + promptDallE(key, input.slice(4)); + } else { + promptGPT(key, input); + } +} + + + async function promptGPT(key, input) { + const model = document.getElementById("chat-model").value; + // Set class "loader" to the element with "loader" id + //document.getElementById("loader").classList.add("loader"); + // Make the "loader" visible + document.getElementById("loader").style.display = "block"; + document.getElementById("input").disabled = true; + document.getElementById('messages').scrollIntoView(false) + + // Source: https://stackoverflow.com/a/75751803/11386095 + const response = await fetch("/v1/chat/completions", { + method: "POST", + headers: { + Authorization: `Bearer ${key}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model: model, + messages: Alpine.store("chat").messages(), + stream: true, + }), + }); + + if (!response.ok) { + Alpine.store("chat").add( + "assistant", + `Error: POST /v1/chat/completions ${response.status}`, + ); + return; + } + + const reader = response.body + ?.pipeThrough(new TextDecoderStream()) + .getReader(); + + if (!reader) { + Alpine.store("chat").add( + "assistant", + `Error: Failed to decode API response`, + ); + return; + } + + while (true) { + const { value, done } = await reader.read(); + if (done) break; + let dataDone = false; + const arr = value.split("\n"); + arr.forEach((data) => { + if (data.length === 0) return; + if (data.startsWith(":")) return; + if (data === "data: [DONE]") { + dataDone = true; + return; + } + const token = JSON.parse(data.substring(6)).choices[0].delta.content; + if (!token) { + return; + } + hljs.highlightAll(); + Alpine.store("chat").add("assistant", token); + document.getElementById('messages').scrollIntoView(false) + }); + hljs.highlightAll(); + if (dataDone) break; + } + // Remove class "loader" from the element with "loader" id + //document.getElementById("loader").classList.remove("loader"); + document.getElementById("loader").style.display = "none"; + // enable input + document.getElementById("input").disabled = false; + // scroll to the bottom of the chat + document.getElementById('messages').scrollIntoView(false) + // set focus to the input + document.getElementById("input").focus(); + } + + document.getElementById("key").addEventListener("submit", submitKey); + document.getElementById("prompt").addEventListener("submit", submitPrompt); + document.getElementById("input").focus(); + + const storeKey = localStorage.getItem("key"); + if (storeKey) { + document.getElementById("apiKey").value = storeKey; + } + + marked.setOptions({ + highlight: function (code) { + return hljs.highlightAuto(code).value; + }, + }); diff --git a/core/http/static/general.css b/core/http/static/general.css new file mode 100644 index 00000000..40d67fb4 --- /dev/null +++ b/core/http/static/general.css @@ -0,0 +1,73 @@ +body { + font-family: 'Inter', sans-serif; +} +.chat-container { height: 90vh; display: flex; flex-direction: column; } +.chat-messages { overflow-y: auto; flex-grow: 1; } +.htmx-indicator{ + opacity:0; + transition: opacity 10ms ease-in; +} +.htmx-request .htmx-indicator{ + opacity:1 +} +/* Loader (https://cssloaders.github.io/) */ +.loader { + width: 12px; + height: 12px; + border-radius: 50%; + display: block; + margin:15px auto; + position: relative; + color: #FFF; + box-sizing: border-box; + animation: animloader 2s linear infinite; +} + +@keyframes animloader { + 0% { box-shadow: 14px 0 0 -2px, 38px 0 0 -2px, -14px 0 0 -2px, -38px 0 0 -2px; } + 25% { box-shadow: 14px 0 0 -2px, 38px 0 0 -2px, -14px 0 0 -2px, -38px 0 0 2px; } + 50% { box-shadow: 14px 0 0 -2px, 38px 0 0 -2px, -14px 0 0 2px, -38px 0 0 -2px; } + 75% { box-shadow: 14px 0 0 2px, 38px 0 0 -2px, -14px 0 0 -2px, -38px 0 0 -2px; } + 100% { box-shadow: 14px 0 0 -2px, 38px 0 0 2px, -14px 0 0 -2px, -38px 0 0 -2px; } +} +.progress { + height: 20px; + margin-bottom: 20px; + overflow: hidden; + background-color: #f5f5f5; + border-radius: 4px; + box-shadow: inset 0 1px 2px rgba(0,0,0,.1); +} +.progress-bar { + float: left; + width: 0%; + height: 100%; + font-size: 12px; + line-height: 20px; + color: #fff; + text-align: center; + background-color: #337ab7; + -webkit-box-shadow: inset 0 -1px 0 rgba(0,0,0,.15); + box-shadow: inset 0 -1px 0 rgba(0,0,0,.15); + -webkit-transition: width .6s ease; + -o-transition: width .6s ease; + transition: width .6s ease; +} + +.user { + background-color: #007bff; +} + +.assistant { + background-color: #28a745; +} + +.message { + display: flex; + align-items: center; +} + +.user, .assistant { + flex-grow: 1; + margin: 0.5rem; +} diff --git a/core/http/static/image.js b/core/http/static/image.js new file mode 100644 index 00000000..315bdda0 --- /dev/null +++ b/core/http/static/image.js @@ -0,0 +1,96 @@ +/* + +https://github.com/david-haerer/chatapi + +MIT License + +Copyright (c) 2023 David Härer +Copyright (c) 2024 Ettore Di Giacinto + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +*/ +function submitKey(event) { + event.preventDefault(); + localStorage.setItem("key", document.getElementById("apiKey").value); + document.getElementById("apiKey").blur(); + } + + +function genImage(event) { + event.preventDefault(); + const input = document.getElementById("input").value; + const key = localStorage.getItem("key"); + + promptDallE(key, input); + +} + +async function promptDallE(key, input) { + document.getElementById("loader").style.display = "block"; + document.getElementById("input").value = ""; + document.getElementById("input").disabled = true; + + const model = document.getElementById("image-model").value; + const response = await fetch("/v1/images/generations", { + method: "POST", + headers: { + Authorization: `Bearer ${key}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model: model, + steps: 10, + prompt: input, + n: 1, + size: "512x512", + }), + }); + const json = await response.json(); + if (json.error) { + // Display error if there is one + var div = document.getElementById('result'); // Get the div by its ID + div.innerHTML = '

' + json.error.message + '

'; + return; + } + const url = json.data[0].url; + + var div = document.getElementById('result'); // Get the div by its ID + var img = document.createElement('img'); // Create a new img element + img.src = url; // Set the source of the image + img.alt = 'Generated image'; // Set the alt text of the image + + div.innerHTML = ''; // Clear the existing content of the div + div.appendChild(img); // Add the new img element to the div + + document.getElementById("loader").style.display = "none"; + document.getElementById("input").disabled = false; + document.getElementById("input").focus(); +} + +document.getElementById("key").addEventListener("submit", submitKey); +document.getElementById("input").focus(); +document.getElementById("genimage").addEventListener("submit", genImage); +document.getElementById("loader").style.display = "none"; + +const storeKey = localStorage.getItem("key"); +if (storeKey) { + document.getElementById("apiKey").value = storeKey; +} + diff --git a/core/http/static/tts.js b/core/http/static/tts.js new file mode 100644 index 00000000..7fc74729 --- /dev/null +++ b/core/http/static/tts.js @@ -0,0 +1,64 @@ +function submitKey(event) { + event.preventDefault(); + localStorage.setItem("key", document.getElementById("apiKey").value); + document.getElementById("apiKey").blur(); + } + + +function genAudio(event) { + event.preventDefault(); + const input = document.getElementById("input").value; + const key = localStorage.getItem("key"); + + tts(key, input); +} + +async function tts(key, input) { + document.getElementById("loader").style.display = "block"; + document.getElementById("input").value = ""; + document.getElementById("input").disabled = true; + + const model = document.getElementById("tts-model").value; + const response = await fetch("/tts", { + method: "POST", + headers: { + Authorization: `Bearer ${key}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model: model, + input: input, + }), + }); + if (!response.ok) { + const jsonData = await response.json(); // Now safely parse JSON + var div = document.getElementById('result'); + div.innerHTML = '

Error: ' +jsonData.error.message + '

'; + return; + } + + var div = document.getElementById('result'); // Get the div by its ID + var link=document.createElement('a'); + link.className = "m-2 float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong"; + link.innerHTML = " Download result"; + const blob = await response.blob(); + link.href=window.URL.createObjectURL(blob); + + div.innerHTML = ''; // Clear the existing content of the div + div.appendChild(link); // Add the new img element to the div + console.log(link) + document.getElementById("loader").style.display = "none"; + document.getElementById("input").disabled = false; + document.getElementById("input").focus(); +} + +document.getElementById("key").addEventListener("submit", submitKey); +document.getElementById("input").focus(); +document.getElementById("tts").addEventListener("submit", genAudio); +document.getElementById("loader").style.display = "none"; + +const storeKey = localStorage.getItem("key"); +if (storeKey) { + document.getElementById("apiKey").value = storeKey; +} + diff --git a/core/http/views/chat.html b/core/http/views/chat.html new file mode 100644 index 00000000..1a14bbc3 --- /dev/null +++ b/core/http/views/chat.html @@ -0,0 +1,189 @@ + + + + {{template "views/partials/head" .}} + + + +
+ + {{template "views/partials/navbar"}} +
+ +
+ +
+ +

Chat with {{.Model}} + +

+ +
+ + +
+ + + +
+
+ +
+

+ Start chatting with the AI by typing a prompt in the input field below. +

+
+ +
+
+ +
+ + +
+ +
+
+ + +
+ + diff --git a/core/http/views/partials/head.html b/core/http/views/partials/head.html index 9dbfecdb..c0bc4134 100644 --- a/core/http/views/partials/head.html +++ b/core/http/views/partials/head.html @@ -2,6 +2,28 @@ {{.Title}} + + + + + + + - \ No newline at end of file diff --git a/core/http/views/partials/navbar.html b/core/http/views/partials/navbar.html index 36332ed2..6b4bb76d 100644 --- a/core/http/views/partials/navbar.html +++ b/core/http/views/partials/navbar.html @@ -10,6 +10,9 @@ Home Documentation Models + Chat + Generate images + TTS API
diff --git a/core/http/views/text2image.html b/core/http/views/text2image.html new file mode 100644 index 00000000..1e412933 --- /dev/null +++ b/core/http/views/text2image.html @@ -0,0 +1,89 @@ + + +{{template "views/partials/head" .}} + + + +
+ + {{template "views/partials/navbar" .}} +
+ + +
+
+ + 🖼️ Text to Image + + + + + +
+ +
+
+ + +
+ + +
+ + + +
+
+ +
+ +
+ +
+
+
+
+
+
+
+
+
+
+ + {{template "views/partials/footer" .}} +
+ + diff --git a/core/http/views/tts.html b/core/http/views/tts.html new file mode 100644 index 00000000..a60467d5 --- /dev/null +++ b/core/http/views/tts.html @@ -0,0 +1,86 @@ + + +{{template "views/partials/head" .}} + + + +
+ + {{template "views/partials/navbar" .}} +
+
+
+ + Text to speech/audio + + + + + +
+
+
+ + +
+ + +
+ + + +
+
+ +
+ +
+ +
+
+
+
+
+
+
+
+
+
+ + {{template "views/partials/footer" .}} +
+ + From 2cc1bd85af27902d58f24301ef2c11efdd63b7b8 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 2 May 2024 23:23:40 +0200 Subject: [PATCH 0193/2648] :arrow_up: Update ggerganov/llama.cpp (#2224) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0f59a852..23570c77 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=8d608a81b7bd170f700648f8214e6f3279d4d715 +CPPLLAMA_VERSION?=6ecf3189e00a1e8e737a78b6d10e1d7006e050a2 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From a31d00d904a7f762c0bd561c84d62fc915aefeb3 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 3 May 2024 00:41:45 +0200 Subject: [PATCH 0194/2648] feat(aio): switch to llama3-based for LLM (#2225) Signed-off-by: mudler --- aio/cpu/text-to-text.yaml | 2 +- aio/gpu-8g/text-to-text.yaml | 2 +- aio/intel/text-to-text.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/aio/cpu/text-to-text.yaml b/aio/cpu/text-to-text.yaml index f2f6aeb4..902b9683 100644 --- a/aio/cpu/text-to-text.yaml +++ b/aio/cpu/text-to-text.yaml @@ -1,7 +1,7 @@ name: gpt-4 mmap: true parameters: - model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q2_K.gguf + model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf template: chat_message: | diff --git a/aio/gpu-8g/text-to-text.yaml b/aio/gpu-8g/text-to-text.yaml index dc620a13..902b9683 100644 --- a/aio/gpu-8g/text-to-text.yaml +++ b/aio/gpu-8g/text-to-text.yaml @@ -1,7 +1,7 @@ name: gpt-4 mmap: true parameters: - model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf + model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf template: chat_message: | diff --git a/aio/intel/text-to-text.yaml b/aio/intel/text-to-text.yaml index bd6b87ba..bc11d4d7 100644 --- a/aio/intel/text-to-text.yaml +++ b/aio/intel/text-to-text.yaml @@ -2,7 +2,7 @@ name: gpt-4 mmap: false f16: false parameters: - model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf + model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf template: chat_message: | From b58274b8a26a3d22605e3c484cf39c5dd9a5cf8e Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 3 May 2024 00:43:02 +0200 Subject: [PATCH 0195/2648] feat(ui): support multilineand style `ul` (#2226) * feat(ui/chat): handle multiline in the input field Signed-off-by: mudler * feat(ui/chat): correctly display multiline messages Signed-off-by: mudler * feat(ui/chat): add list style Signed-off-by: mudler --------- Signed-off-by: mudler --- core/http/static/chat.js | 6 +---- core/http/static/general.css | 20 ++++++++++++++++ core/http/views/chat.html | 45 +++++++++++++++++++++++------------- 3 files changed, 50 insertions(+), 21 deletions(-) diff --git a/core/http/static/chat.js b/core/http/static/chat.js index 48017d60..db7e7856 100644 --- a/core/http/static/chat.js +++ b/core/http/static/chat.js @@ -40,11 +40,7 @@ function submitPrompt(event) { document.getElementById("input").value = ""; const key = localStorage.getItem("key"); - if (input.startsWith("!img")) { - promptDallE(key, input.slice(4)); - } else { - promptGPT(key, input); - } + promptGPT(key, input); } diff --git a/core/http/static/general.css b/core/http/static/general.css index 40d67fb4..fd1161e8 100644 --- a/core/http/static/general.css +++ b/core/http/static/general.css @@ -71,3 +71,23 @@ body { flex-grow: 1; margin: 0.5rem; } + +ul { + list-style-type: disc; /* Adds bullet points */ + padding-left: 1.25rem; /* Indents the list from the left margin */ + margin-top: 1rem; /* Space above the list */ +} + +li { + font-size: 0.875rem; /* Small text size */ + color: #4a5568; /* Dark gray text */ + background-color: #f7fafc; /* Very light gray background */ + border-radius: 0.375rem; /* Rounded corners */ + padding: 0.5rem; /* Padding inside each list item */ + box-shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.1), 0 1px 2px 0 rgba(0, 0, 0, 0.06); /* Subtle shadow */ + margin-bottom: 0.5rem; /* Vertical space between list items */ +} + +li:last-child { + margin-bottom: 0; /* Removes bottom margin from the last item */ +} diff --git a/core/http/views/chat.html b/core/http/views/chat.html index 1a14bbc3..eebf9083 100644 --- a/core/http/views/chat.html +++ b/core/http/views/chat.html @@ -106,7 +106,7 @@ SOFTWARE.
-
+
+
- + + +
@@ -146,7 +170,7 @@ SOFTWARE. clear() { this.history.length = 0; }, - add(role, content) { + add(role, content, image) { const N = this.history.length - 1; if (this.history.length && this.history[N].role === role) { this.history[N].content += content; @@ -167,6 +191,7 @@ SOFTWARE. role: role, content: content, html: c, + image: image, }); } @@ -191,6 +216,7 @@ SOFTWARE. return { role: message.role, content: message.content, + image: message.image, }; }); }, diff --git a/core/http/views/index.html b/core/http/views/index.html index f8cae175..66de37fa 100644 --- a/core/http/views/index.html +++ b/core/http/views/index.html @@ -10,38 +10,76 @@

Welcome to your LocalAI instance!

-
- -

The FOSS alternative to OpenAI, Claude, ...

Documentation
-
+
+ + + {{ if .ProcessingModels }} +

Operations in progress

+ {{end}} + {{$taskType:=.TaskTypes}} + {{ range $key,$value:=.ProcessingModels }} + {{ $op := index $taskType $key}} + {{$parts := split "@" $key}} +
+
+ {{$parts._1}} (from the '{{$parts._0}}' repository) +
+
+

{{$op}} +

+
+
+ {{ end }} + + {{ if eq (len .ModelsConfig) 0 }}

Ouch! seems you don't have any models installed!

..install something from the 🖼️ Gallery or check the Getting started documentation

{{ else }}

Installed models

We have {{len .ModelsConfig}} pre-loaded models available.

-
    + + + + + + + + + + {{$galleryConfig:=.GalleryConfig}} + {{$noicon:="https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg"}} {{ range .ModelsConfig }} {{ $cfg:= index $galleryConfig .Name}} -
  • -
    - +
  • + + + + {{ end }} - + +
    Model NameBackendActions
    + {{ with $cfg }} - -

    {{.Name}}

    + {{ else}} + + {{ end }} +
    +

    {{.Name}}

    +
    {{ if .Backend }} @@ -52,16 +90,20 @@ auto {{ end }} + - - - +
    {{ end }} + + +
diff --git a/core/http/views/models.html b/core/http/views/models.html index 17561594..fe5af2d5 100644 --- a/core/http/views/models.html +++ b/core/http/views/models.html @@ -63,8 +63,33 @@ {{ end }}
+ - + {{ if .ProcessingModels }} +

Operations in progress

+ {{end}} + {{$taskType:=.TaskTypes}} + {{ range $key,$value:=.ProcessingModels }} + {{ $op := index $taskType $key}} + {{$parts := split "@" $key}} +
+
+ {{$parts._1}} (from the '{{$parts._0}}' repository) +
+
+

{{$op}} +

+
+
+ {{ end }} + + + Date: Wed, 8 May 2024 19:34:33 +0200 Subject: [PATCH 0233/2648] fix(ux): fix small glitches (#2265) also drop duplicates for displaying in-progress model ops Signed-off-by: Ettore Di Giacinto --- core/http/elements/gallery.go | 80 +++++++++++++----------- core/http/routes/ui.go | 29 +++++++-- core/http/views/chat.html | 3 +- core/http/views/index.html | 26 +------- core/http/views/models.html | 24 +------ core/http/views/partials/inprogress.html | 32 ++++++++++ 6 files changed, 104 insertions(+), 90 deletions(-) create mode 100644 core/http/views/partials/inprogress.html diff --git a/core/http/elements/gallery.go b/core/http/elements/gallery.go index 16a74553..7ca34aef 100644 --- a/core/http/elements/gallery.go +++ b/core/http/elements/gallery.go @@ -12,17 +12,20 @@ import ( ) const ( - NoImage = "https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg" + noImage = "https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg" ) func DoneProgress(galleryID, text string, showDelete bool) string { + var modelName = galleryID // Split by @ and grab the name if strings.Contains(galleryID, "@") { - galleryID = strings.Split(galleryID, "@")[1] + modelName = strings.Split(galleryID, "@")[1] } return elem.Div( - attrs.Props{}, + attrs.Props{ + "id": "action-div-" + dropBadChars(galleryID), + }, elem.H3( attrs.Props{ "role": "status", @@ -32,7 +35,7 @@ func DoneProgress(galleryID, text string, showDelete bool) string { }, elem.Text(text), ), - elem.If(showDelete, deleteButton(galleryID), reInstallButton(galleryID)), + elem.If(showDelete, deleteButton(galleryID, modelName), reInstallButton(galleryID)), ).Render() } @@ -77,7 +80,7 @@ func StartProgressBar(uid, progress, text string) string { attrs.Props{ "hx-trigger": "done", "hx-get": "/browse/job/" + uid, - "hx-swap": "innerHTML", + "hx-swap": "outerHTML", "hx-target": "this", }, elem.H3( @@ -88,7 +91,6 @@ func StartProgressBar(uid, progress, text string) string { "autofocus": "", }, elem.Text(text), - // This is a simple example of how to use the HTMLX library to create a progress bar that updates every 600ms. elem.Div(attrs.Props{ "hx-get": "/browse/job/progress/" + uid, "hx-trigger": "every 600ms", @@ -192,6 +194,7 @@ func reInstallButton(galleryName string) elem.Node { "data-twe-ripple-init": "", "data-twe-ripple-color": "light", "class": "float-right inline-block rounded bg-primary ml-2 px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong", + "hx-target": "#action-div-" + dropBadChars(galleryName), "hx-swap": "outerHTML", // post the Model ID as param "hx-post": "/browse/install/model/" + galleryName, @@ -205,16 +208,17 @@ func reInstallButton(galleryName string) elem.Node { ) } -func deleteButton(modelName string) elem.Node { +func deleteButton(galleryID, modelName string) elem.Node { return elem.Button( attrs.Props{ "data-twe-ripple-init": "", "data-twe-ripple-color": "light", "hx-confirm": "Are you sure you wish to delete the model?", "class": "float-right inline-block rounded bg-red-800 px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-red-accent-300 hover:shadow-red-2 focus:bg-red-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-red-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong", + "hx-target": "#action-div-" + dropBadChars(galleryID), "hx-swap": "outerHTML", // post the Model ID as param - "hx-post": "/browse/delete/model/" + modelName, + "hx-post": "/browse/delete/model/" + galleryID, }, elem.I( attrs.Props{ @@ -225,20 +229,14 @@ func deleteButton(modelName string) elem.Node { ) } +// Javascript/HTMX doesn't like weird IDs +func dropBadChars(s string) string { + return strings.ReplaceAll(s, "@", "__") +} + func ListModels(models []*gallery.GalleryModel, processing *xsync.SyncedMap[string, string], galleryService *services.GalleryService) string { - //StartProgressBar(uid, "0") modelsElements := []elem.Node{} - // span := func(s string) elem.Node { - // return elem.Span( - // attrs.Props{ - // "class": "float-right inline-block bg-green-500 text-white py-1 px-3 rounded-full text-xs", - // }, - // elem.Text(s), - // ) - // } - descriptionDiv := func(m *gallery.GalleryModel) elem.Node { - return elem.Div( attrs.Props{ "class": "p-6 text-surface dark:text-white", @@ -261,13 +259,16 @@ func ListModels(models []*gallery.GalleryModel, processing *xsync.SyncedMap[stri actionDiv := func(m *gallery.GalleryModel) elem.Node { galleryID := fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name) currentlyProcessing := processing.Exists(galleryID) + jobID := "" isDeletionOp := false if currentlyProcessing { status := galleryService.GetStatus(galleryID) if status != nil && status.Deletion { isDeletionOp = true } - // if status == nil : "Waiting" + jobID = processing.Get(galleryID) + // TODO: + // case not handled, if status == nil : "Waiting" } nodes := []elem.Node{ @@ -317,29 +318,33 @@ func ListModels(models []*gallery.GalleryModel, processing *xsync.SyncedMap[stri }, nodes..., ), - elem.If( - currentlyProcessing, - elem.Node( // If currently installing, show progress bar - elem.Raw(StartProgressBar(processing.Get(galleryID), "0", progressMessage)), - ), // Otherwise, show install button (if not installed) or display "Installed" - elem.If(m.Installed, - elem.Node(elem.Div( - attrs.Props{}, - reInstallButton(m.ID()), - deleteButton(m.Name), - )), - installButton(m.ID()), + elem.Div( + attrs.Props{ + "id": "action-div-" + dropBadChars(galleryID), + }, + elem.If( + currentlyProcessing, + elem.Node( // If currently installing, show progress bar + elem.Raw(StartProgressBar(jobID, "0", progressMessage)), + ), // Otherwise, show install button (if not installed) or display "Installed" + elem.If(m.Installed, + elem.Node(elem.Div( + attrs.Props{}, + reInstallButton(m.ID()), + deleteButton(m.ID(), m.Name), + )), + installButton(m.ID()), + ), ), ), ) } for _, m := range models { - elems := []elem.Node{} if m.Icon == "" { - m.Icon = NoImage + m.Icon = noImage } divProperties := attrs.Props{ @@ -347,7 +352,6 @@ func ListModels(models []*gallery.GalleryModel, processing *xsync.SyncedMap[stri } elems = append(elems, - elem.Div(divProperties, elem.A(attrs.Props{ "href": "#!", @@ -359,8 +363,11 @@ func ListModels(models []*gallery.GalleryModel, processing *xsync.SyncedMap[stri "src": m.Icon, }), ), - )) + ), + ) + // Special/corner case: if a model sets Trust Remote Code as required, show a warning + // TODO: handle this more generically later _, trustRemoteCodeExists := m.Overrides["trust_remote_code"] if trustRemoteCodeExists { elems = append(elems, elem.Div( @@ -392,7 +399,6 @@ func ListModels(models []*gallery.GalleryModel, processing *xsync.SyncedMap[stri wrapper := elem.Div(attrs.Props{ "class": "dark grid grid-cols-1 grid-rows-1 md:grid-cols-3 block rounded-lg shadow-secondary-1 dark:bg-surface-dark", - //"class": "block rounded-lg bg-white shadow-secondary-1 dark:bg-surface-dark", }, modelsElements...) return wrapper.Render() diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go index d376d10e..8cbb4b28 100644 --- a/core/http/routes/ui.go +++ b/core/http/routes/ui.go @@ -14,6 +14,7 @@ import ( "github.com/go-skynet/LocalAI/pkg/gallery" "github.com/go-skynet/LocalAI/pkg/model" "github.com/go-skynet/LocalAI/pkg/xsync" + "github.com/rs/zerolog/log" "github.com/gofiber/fiber/v2" "github.com/google/uuid" @@ -117,6 +118,7 @@ func RegisterUIRoutes(app *fiber.App, // https://htmx.org/examples/progress-bar/ app.Post("/browse/install/model/:id", auth, func(c *fiber.Ctx) error { galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests! + log.Debug().Msgf("UI job submitted to install : %+v\n", galleryID) id, err := uuid.NewUUID() if err != nil { @@ -143,6 +145,14 @@ func RegisterUIRoutes(app *fiber.App, // https://htmx.org/examples/progress-bar/ app.Post("/browse/delete/model/:id", auth, func(c *fiber.Ctx) error { galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests! + log.Debug().Msgf("UI job submitted to delete : %+v\n", galleryID) + var galleryName = galleryID + if strings.Contains(galleryID, "@") { + // if the galleryID contains a @ it means that it's a model from a gallery + // but we want to delete it from the local models which does not need + // a repository ID + galleryName = strings.Split(galleryID, "@")[1] + } id, err := uuid.NewUUID() if err != nil { @@ -151,16 +161,20 @@ func RegisterUIRoutes(app *fiber.App, uid := id.String() + // Track the deletion job by galleryID and galleryName + // The GalleryID contains information about the repository, + // while the GalleryName is ONLY the name of the model + processingModels.Set(galleryName, uid) processingModels.Set(galleryID, uid) op := gallery.GalleryOp{ Id: uid, Delete: true, - GalleryModelName: galleryID, + GalleryModelName: galleryName, } go func() { galleryService.C <- op - cl.RemoveBackendConfig(galleryID) + cl.RemoveBackendConfig(galleryName) }() return c.SendString(elements.StartProgressBar(uid, "0", "Deletion")) @@ -170,7 +184,7 @@ func RegisterUIRoutes(app *fiber.App, // If the job is done, we trigger the /browse/job/:uid route // https://htmx.org/examples/progress-bar/ app.Get("/browse/job/progress/:uid", auth, func(c *fiber.Ctx) error { - jobUID := c.Params("uid") + jobUID := strings.Clone(c.Params("uid")) // note: strings.Clone is required for multiple requests! status := galleryService.GetStatus(jobUID) if status == nil { @@ -192,17 +206,22 @@ func RegisterUIRoutes(app *fiber.App, // this route is hit when the job is done, and we display the // final state (for now just displays "Installation completed") app.Get("/browse/job/:uid", auth, func(c *fiber.Ctx) error { + jobUID := strings.Clone(c.Params("uid")) // note: strings.Clone is required for multiple requests! - status := galleryService.GetStatus(c.Params("uid")) + status := galleryService.GetStatus(jobUID) galleryID := "" for _, k := range processingModels.Keys() { - if processingModels.Get(k) == c.Params("uid") { + if processingModels.Get(k) == jobUID { galleryID = k processingModels.Delete(k) } } + if galleryID == "" { + log.Debug().Msgf("no processing model found for job : %+v\n", jobUID) + } + log.Debug().Msgf("JOB finished : %+v\n", status) showDelete := true displayText := "Installation completed" if status.Deletion { diff --git a/core/http/views/chat.html b/core/http/views/chat.html index 190cb877..7f13c7bd 100644 --- a/core/http/views/chat.html +++ b/core/http/views/chat.html @@ -113,7 +113,8 @@ SOFTWARE.

- Start chatting with the AI by typing a prompt in the input field below. + Start chatting with the AI by typing a prompt in the input field below and pressing Enter. + For models that support images, you can upload an image by clicking the paperclip icon.