From 482a83886e5cbb738e874ada43afd05f67d761a8 Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Sun, 21 May 2023 00:40:17 +0200
Subject: [PATCH 001/137] :arrow_up: Update ggerganov/whisper.cpp (#332)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 37a0d8a9..393e7ce3 100644
--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,7 @@ GPT4ALL_VERSION?=914519e772fd78c15691dcd0b8bac60d6af514ec
 GOGPT2_VERSION?=7bff56f0224502c1c9ed6258d2a17e8084628827
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=07166da10cb2a9e8854395a4f210464dcea76e47
-WHISPER_CPP_VERSION?=bc89f285d8b025867cae87421824f2dea1c8899f
+WHISPER_CPP_VERSION?=041be06d5881d3c759cc4ed45d655804361237cd
 BERT_VERSION?=cea1ed76a7f48ef386a8e369f6c82c48cdf2d551
 BLOOMZ_VERSION?=e9366e82abdfe70565644fbfae9651976714efd1
 BUILD_TYPE?=

From 91bdad1d12fb8e7ae4181803c188f3054634b13e Mon Sep 17 00:00:00 2001
From: mudler <mudler@mocaccino.org>
Date: Sun, 21 May 2023 00:41:11 +0200
Subject: [PATCH 002/137] docs: fix typo

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index e71a56de..6b56e4cd 100644
--- a/README.md
+++ b/README.md
@@ -868,7 +868,7 @@ curl http://localhost:8080/models/apply -H "Content-Type: application/json" -d '
         {
             "uri": "<additional_file>",
             "sha256": "<additional_file_hash>",
-            "name": "<additional_file_name>"
+            "filename": "<additional_file_name>"
         },
       "overrides": { "backend": "...", "f16": true }
      ]

From 3c07e11e736bade6607b85b7ddb6c5b507105ec2 Mon Sep 17 00:00:00 2001
From: mudler <mudler@mocaccino.org>
Date: Sun, 21 May 2023 00:45:24 +0200
Subject: [PATCH 003/137] docs: update README

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 6b56e4cd..43a7c99d 100644
--- a/README.md
+++ b/README.md
@@ -39,6 +39,7 @@ LocalAI uses C++ bindings for optimizing speed. It is based on [llama.cpp](https
 
 ## News
 
+- 21-05-2023: __v1.14.0__ released. Minor updates to the `/models/apply` endpoint, `llama.cpp` backend updated including https://github.com/ggerganov/llama.cpp/pull/1508 which breaks compatibility with older models. `gpt4all` is still compatible with the old format. 
 - 19-05-2023: __v1.13.0__ released! 🔥🔥 updates to the `gpt4all` and `llama` backend, consolidated CUDA support ( https://github.com/go-skynet/LocalAI/pull/310 thanks to @bubthegreat and @Thireus ), preliminar support for [installing models via API](https://github.com/go-skynet/LocalAI#advanced-prepare-models-using-the-api).
 - 17-05-2023:  __v1.12.0__ released! 🔥🔥 Minor fixes, plus CUDA (https://github.com/go-skynet/LocalAI/pull/258) support for `llama.cpp`-compatible models and image generation (https://github.com/go-skynet/LocalAI/pull/272).
 - 16-05-2023: 🔥🔥🔥 Experimental support for CUDA (https://github.com/go-skynet/LocalAI/pull/258) in the `llama.cpp` backend and Stable diffusion CPU image generation (https://github.com/go-skynet/LocalAI/pull/272) in `master`.

From ed5df1e68ea251bf92ce49aa19b917eea3a012e0 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 21 May 2023 12:25:24 +0200
Subject: [PATCH 004/137] examples: remove threads from example models (#337)

---
 examples/chatbot-ui/models/gpt-3.5-turbo.yaml | 1 -
 examples/langchain/models/gpt-3.5-turbo.yaml  | 1 -
 examples/query_data/models/gpt-3.5-turbo.yaml | 1 -
 examples/rwkv/models/gpt-3.5-turbo.yaml       | 1 -
 4 files changed, 4 deletions(-)

diff --git a/examples/chatbot-ui/models/gpt-3.5-turbo.yaml b/examples/chatbot-ui/models/gpt-3.5-turbo.yaml
index 6df1dbfd..5c192f5d 100644
--- a/examples/chatbot-ui/models/gpt-3.5-turbo.yaml
+++ b/examples/chatbot-ui/models/gpt-3.5-turbo.yaml
@@ -5,7 +5,6 @@ parameters:
   temperature: 0.2
   top_p: 0.7
 context_size: 1024
-threads: 14
 stopwords:
 - "HUMAN:"
 - "GPT:"
diff --git a/examples/langchain/models/gpt-3.5-turbo.yaml b/examples/langchain/models/gpt-3.5-turbo.yaml
index 15694125..ea894fb9 100644
--- a/examples/langchain/models/gpt-3.5-turbo.yaml
+++ b/examples/langchain/models/gpt-3.5-turbo.yaml
@@ -5,7 +5,6 @@ parameters:
   temperature: 0.2
   top_p: 0.7
 context_size: 1024
-threads: 4
 stopwords:
 - "HUMAN:"
 - "GPT:"
diff --git a/examples/query_data/models/gpt-3.5-turbo.yaml b/examples/query_data/models/gpt-3.5-turbo.yaml
index 6df1dbfd..5c192f5d 100644
--- a/examples/query_data/models/gpt-3.5-turbo.yaml
+++ b/examples/query_data/models/gpt-3.5-turbo.yaml
@@ -5,7 +5,6 @@ parameters:
   temperature: 0.2
   top_p: 0.7
 context_size: 1024
-threads: 14
 stopwords:
 - "HUMAN:"
 - "GPT:"
diff --git a/examples/rwkv/models/gpt-3.5-turbo.yaml b/examples/rwkv/models/gpt-3.5-turbo.yaml
index 0193b727..1afce1a3 100644
--- a/examples/rwkv/models/gpt-3.5-turbo.yaml
+++ b/examples/rwkv/models/gpt-3.5-turbo.yaml
@@ -6,7 +6,6 @@ parameters:
   max_tokens: 100
   top_p: 0.8
 context_size: 1024
-threads: 14
 backend: "rwkv"
 cutwords:
 - "Bob:.*"

From 6f54cab3f04a486eeb2be6fd65107c7bfb0a8cc4 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 21 May 2023 14:38:25 +0200
Subject: [PATCH 005/137] feat: allow to set cors (#339)

---
 api/api.go      |  69 +++++++++++++++++--------------
 api/api_test.go |   6 +--
 api/openai.go   |  60 +++++++++++++--------------
 api/options.go  | 108 ++++++++++++++++++++++++++++++++++++++++++++++++
 main.go         |  21 +++++++++-
 5 files changed, 199 insertions(+), 65 deletions(-)
 create mode 100644 api/options.go

diff --git a/api/api.go b/api/api.go
index b81a89f5..b8d77f20 100644
--- a/api/api.go
+++ b/api/api.go
@@ -1,10 +1,8 @@
 package api
 
 import (
-	"context"
 	"errors"
 
-	model "github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
 	"github.com/gofiber/fiber/v2/middleware/cors"
 	"github.com/gofiber/fiber/v2/middleware/logger"
@@ -13,16 +11,18 @@ import (
 	"github.com/rs/zerolog/log"
 )
 
-func App(c context.Context, configFile string, loader *model.ModelLoader, uploadLimitMB, threads, ctxSize int, f16 bool, debug, disableMessage bool, imageDir string) *fiber.App {
+func App(opts ...AppOption) *fiber.App {
+	options := newOptions(opts...)
+
 	zerolog.SetGlobalLevel(zerolog.InfoLevel)
-	if debug {
+	if options.debug {
 		zerolog.SetGlobalLevel(zerolog.DebugLevel)
 	}
 
 	// Return errors as JSON responses
 	app := fiber.New(fiber.Config{
-		BodyLimit:             uploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
-		DisableStartupMessage: disableMessage,
+		BodyLimit:             options.uploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
+		DisableStartupMessage: options.disableMessage,
 		// Override default error handler
 		ErrorHandler: func(ctx *fiber.Ctx, err error) error {
 			// Status code defaults to 500
@@ -43,24 +43,24 @@ func App(c context.Context, configFile string, loader *model.ModelLoader, upload
 		},
 	})
 
-	if debug {
+	if options.debug {
 		app.Use(logger.New(logger.Config{
 			Format: "[${ip}]:${port} ${status} - ${method} ${path}\n",
 		}))
 	}
 
 	cm := NewConfigMerger()
-	if err := cm.LoadConfigs(loader.ModelPath); err != nil {
+	if err := cm.LoadConfigs(options.loader.ModelPath); err != nil {
 		log.Error().Msgf("error loading config files: %s", err.Error())
 	}
 
-	if configFile != "" {
-		if err := cm.LoadConfigFile(configFile); err != nil {
+	if options.configFile != "" {
+		if err := cm.LoadConfigFile(options.configFile); err != nil {
 			log.Error().Msgf("error loading config file: %s", err.Error())
 		}
 	}
 
-	if debug {
+	if options.debug {
 		for _, v := range cm.ListConfigs() {
 			cfg, _ := cm.GetConfig(v)
 			log.Debug().Msgf("Model: %s (config: %+v)", v, cfg)
@@ -68,46 +68,55 @@ func App(c context.Context, configFile string, loader *model.ModelLoader, upload
 	}
 	// Default middleware config
 	app.Use(recover.New())
-	app.Use(cors.New())
+
+	if options.cors {
+		if options.corsAllowOrigins == "" {
+			app.Use(cors.New())
+		} else {
+			app.Use(cors.New(cors.Config{
+				AllowOrigins: options.corsAllowOrigins,
+			}))
+		}
+	}
 
 	// LocalAI API endpoints
-	applier := newGalleryApplier(loader.ModelPath)
-	applier.start(c, cm)
-	app.Post("/models/apply", applyModelGallery(loader.ModelPath, cm, applier.C))
+	applier := newGalleryApplier(options.loader.ModelPath)
+	applier.start(options.context, cm)
+	app.Post("/models/apply", applyModelGallery(options.loader.ModelPath, cm, applier.C))
 	app.Get("/models/jobs/:uuid", getOpStatus(applier))
 
 	// openAI compatible API endpoint
 
 	// chat
-	app.Post("/v1/chat/completions", chatEndpoint(cm, debug, loader, threads, ctxSize, f16))
-	app.Post("/chat/completions", chatEndpoint(cm, debug, loader, threads, ctxSize, f16))
+	app.Post("/v1/chat/completions", chatEndpoint(cm, options))
+	app.Post("/chat/completions", chatEndpoint(cm, options))
 
 	// edit
-	app.Post("/v1/edits", editEndpoint(cm, debug, loader, threads, ctxSize, f16))
-	app.Post("/edits", editEndpoint(cm, debug, loader, threads, ctxSize, f16))
+	app.Post("/v1/edits", editEndpoint(cm, options))
+	app.Post("/edits", editEndpoint(cm, options))
 
 	// completion
-	app.Post("/v1/completions", completionEndpoint(cm, debug, loader, threads, ctxSize, f16))
-	app.Post("/completions", completionEndpoint(cm, debug, loader, threads, ctxSize, f16))
+	app.Post("/v1/completions", completionEndpoint(cm, options))
+	app.Post("/completions", completionEndpoint(cm, options))
 
 	// embeddings
-	app.Post("/v1/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16))
-	app.Post("/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16))
-	app.Post("/v1/engines/:model/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16))
+	app.Post("/v1/embeddings", embeddingsEndpoint(cm, options))
+	app.Post("/embeddings", embeddingsEndpoint(cm, options))
+	app.Post("/v1/engines/:model/embeddings", embeddingsEndpoint(cm, options))
 
 	// audio
-	app.Post("/v1/audio/transcriptions", transcriptEndpoint(cm, debug, loader, threads, ctxSize, f16))
+	app.Post("/v1/audio/transcriptions", transcriptEndpoint(cm, options))
 
 	// images
-	app.Post("/v1/images/generations", imageEndpoint(cm, debug, loader, imageDir))
+	app.Post("/v1/images/generations", imageEndpoint(cm, options))
 
-	if imageDir != "" {
-		app.Static("/generated-images", imageDir)
+	if options.imageDir != "" {
+		app.Static("/generated-images", options.imageDir)
 	}
 
 	// models
-	app.Get("/v1/models", listModels(loader, cm))
-	app.Get("/models", listModels(loader, cm))
+	app.Get("/v1/models", listModels(options.loader, cm))
+	app.Get("/models", listModels(options.loader, cm))
 
 	return app
 }
diff --git a/api/api_test.go b/api/api_test.go
index f061527f..4b245143 100644
--- a/api/api_test.go
+++ b/api/api_test.go
@@ -114,7 +114,7 @@ var _ = Describe("API test", func() {
 			modelLoader = model.NewModelLoader(tmpdir)
 			c, cancel = context.WithCancel(context.Background())
 
-			app = App(c, "", modelLoader, 15, 1, 512, false, true, true, "")
+			app = App(WithContext(c), WithModelLoader(modelLoader))
 			go app.Listen("127.0.0.1:9090")
 
 			defaultConfig := openai.DefaultConfig("")
@@ -198,7 +198,7 @@ var _ = Describe("API test", func() {
 			modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
 			c, cancel = context.WithCancel(context.Background())
 
-			app = App(c, "", modelLoader, 15, 1, 512, false, true, true, "")
+			app = App(WithContext(c), WithModelLoader(modelLoader))
 			go app.Listen("127.0.0.1:9090")
 
 			defaultConfig := openai.DefaultConfig("")
@@ -316,7 +316,7 @@ var _ = Describe("API test", func() {
 			modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
 			c, cancel = context.WithCancel(context.Background())
 
-			app = App(c, os.Getenv("CONFIG_FILE"), modelLoader, 5, 1, 512, false, true, true, "")
+			app = App(WithContext(c), WithModelLoader(modelLoader), WithConfigFile(os.Getenv("CONFIG_FILE")))
 			go app.Listen("127.0.0.1:9090")
 
 			defaultConfig := openai.DefaultConfig("")
diff --git a/api/openai.go b/api/openai.go
index 0a85349c..dffdcbfe 100644
--- a/api/openai.go
+++ b/api/openai.go
@@ -142,15 +142,15 @@ func defaultRequest(modelFile string) OpenAIRequest {
 }
 
 // https://platform.openai.com/docs/api-reference/completions
-func completionEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
+func completionEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 
-		model, input, err := readInput(c, loader, true)
+		model, input, err := readInput(c, o.loader, true)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 
-		config, input, err := readConfig(model, input, cm, loader, debug, threads, ctx, f16)
+		config, input, err := readConfig(model, input, cm, o.loader, o.debug, o.threads, o.ctxSize, o.f16)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
@@ -166,7 +166,7 @@ func completionEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader,
 		var result []Choice
 		for _, i := range config.PromptStrings {
 			// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
-			templatedInput, err := loader.TemplatePrefix(templateFile, struct {
+			templatedInput, err := o.loader.TemplatePrefix(templateFile, struct {
 				Input string
 			}{Input: i})
 			if err == nil {
@@ -174,7 +174,7 @@ func completionEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader,
 				log.Debug().Msgf("Template found, input modified to: %s", i)
 			}
 
-			r, err := ComputeChoices(i, input, config, loader, func(s string, c *[]Choice) {
+			r, err := ComputeChoices(i, input, config, o.loader, func(s string, c *[]Choice) {
 				*c = append(*c, Choice{Text: s})
 			}, nil)
 			if err != nil {
@@ -199,14 +199,14 @@ func completionEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader,
 }
 
 // https://platform.openai.com/docs/api-reference/embeddings
-func embeddingsEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
+func embeddingsEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		model, input, err := readInput(c, loader, true)
+		model, input, err := readInput(c, o.loader, true)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 
-		config, input, err := readConfig(model, input, cm, loader, debug, threads, ctx, f16)
+		config, input, err := readConfig(model, input, cm, o.loader, o.debug, o.threads, o.ctxSize, o.f16)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
@@ -216,7 +216,7 @@ func embeddingsEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader,
 
 		for i, s := range config.InputToken {
 			// get the model function to call for the result
-			embedFn, err := ModelEmbedding("", s, loader, *config)
+			embedFn, err := ModelEmbedding("", s, o.loader, *config)
 			if err != nil {
 				return err
 			}
@@ -230,7 +230,7 @@ func embeddingsEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader,
 
 		for i, s := range config.InputStrings {
 			// get the model function to call for the result
-			embedFn, err := ModelEmbedding(s, []int{}, loader, *config)
+			embedFn, err := ModelEmbedding(s, []int{}, o.loader, *config)
 			if err != nil {
 				return err
 			}
@@ -256,7 +256,7 @@ func embeddingsEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader,
 	}
 }
 
-func chatEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
+func chatEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
 
 	process := func(s string, req *OpenAIRequest, config *Config, loader *model.ModelLoader, responses chan OpenAIResponse) {
 		ComputeChoices(s, req, config, loader, func(s string, c *[]Choice) {}, func(s string) bool {
@@ -273,12 +273,12 @@ func chatEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, threa
 		close(responses)
 	}
 	return func(c *fiber.Ctx) error {
-		model, input, err := readInput(c, loader, true)
+		model, input, err := readInput(c, o.loader, true)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 
-		config, input, err := readConfig(model, input, cm, loader, debug, threads, ctx, f16)
+		config, input, err := readConfig(model, input, cm, o.loader, o.debug, o.threads, o.ctxSize, o.f16)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
@@ -319,7 +319,7 @@ func chatEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, threa
 		}
 
 		// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
-		templatedInput, err := loader.TemplatePrefix(templateFile, struct {
+		templatedInput, err := o.loader.TemplatePrefix(templateFile, struct {
 			Input string
 		}{Input: predInput})
 		if err == nil {
@@ -330,7 +330,7 @@ func chatEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, threa
 		if input.Stream {
 			responses := make(chan OpenAIResponse)
 
-			go process(predInput, input, config, loader, responses)
+			go process(predInput, input, config, o.loader, responses)
 
 			c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
 
@@ -358,7 +358,7 @@ func chatEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, threa
 			return nil
 		}
 
-		result, err := ComputeChoices(predInput, input, config, loader, func(s string, c *[]Choice) {
+		result, err := ComputeChoices(predInput, input, config, o.loader, func(s string, c *[]Choice) {
 			*c = append(*c, Choice{Message: &Message{Role: "assistant", Content: s}})
 		}, nil)
 		if err != nil {
@@ -378,14 +378,14 @@ func chatEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, threa
 	}
 }
 
-func editEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
+func editEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		model, input, err := readInput(c, loader, true)
+		model, input, err := readInput(c, o.loader, true)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 
-		config, input, err := readConfig(model, input, cm, loader, debug, threads, ctx, f16)
+		config, input, err := readConfig(model, input, cm, o.loader, o.debug, o.threads, o.ctxSize, o.f16)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
@@ -401,7 +401,7 @@ func editEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, threa
 		var result []Choice
 		for _, i := range config.InputStrings {
 			// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
-			templatedInput, err := loader.TemplatePrefix(templateFile, struct {
+			templatedInput, err := o.loader.TemplatePrefix(templateFile, struct {
 				Input       string
 				Instruction string
 			}{Input: i})
@@ -410,7 +410,7 @@ func editEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, threa
 				log.Debug().Msgf("Template found, input modified to: %s", i)
 			}
 
-			r, err := ComputeChoices(i, input, config, loader, func(s string, c *[]Choice) {
+			r, err := ComputeChoices(i, input, config, o.loader, func(s string, c *[]Choice) {
 				*c = append(*c, Choice{Text: s})
 			}, nil)
 			if err != nil {
@@ -449,9 +449,9 @@ func editEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, threa
 
 *
 */
-func imageEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, imageDir string) func(c *fiber.Ctx) error {
+func imageEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		m, input, err := readInput(c, loader, false)
+		m, input, err := readInput(c, o.loader, false)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
@@ -461,7 +461,7 @@ func imageEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, imag
 		}
 		log.Debug().Msgf("Loading model: %+v", m)
 
-		config, input, err := readConfig(m, input, cm, loader, debug, 0, 0, false)
+		config, input, err := readConfig(m, input, cm, o.loader, o.debug, 0, 0, false)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
@@ -518,7 +518,7 @@ func imageEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, imag
 
 				tempDir := ""
 				if !b64JSON {
-					tempDir = imageDir
+					tempDir = o.imageDir
 				}
 				// Create a temporary file
 				outputFile, err := ioutil.TempFile(tempDir, "b64")
@@ -535,7 +535,7 @@ func imageEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, imag
 
 				baseURL := c.BaseURL()
 
-				fn, err := ImageGeneration(height, width, mode, step, input.Seed, positive_prompt, negative_prompt, output, loader, *config)
+				fn, err := ImageGeneration(height, width, mode, step, input.Seed, positive_prompt, negative_prompt, output, o.loader, *config)
 				if err != nil {
 					return err
 				}
@@ -574,14 +574,14 @@ func imageEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, imag
 }
 
 // https://platform.openai.com/docs/api-reference/audio/create
-func transcriptEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
+func transcriptEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		m, input, err := readInput(c, loader, false)
+		m, input, err := readInput(c, o.loader, false)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 
-		config, input, err := readConfig(m, input, cm, loader, debug, threads, ctx, f16)
+		config, input, err := readConfig(m, input, cm, o.loader, o.debug, o.threads, o.ctxSize, o.f16)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
@@ -616,7 +616,7 @@ func transcriptEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader,
 
 		log.Debug().Msgf("Audio file copied to: %+v", dst)
 
-		whisperModel, err := loader.BackendLoader(model.WhisperBackend, config.Model, []llama.ModelOption{}, uint32(config.Threads))
+		whisperModel, err := o.loader.BackendLoader(model.WhisperBackend, config.Model, []llama.ModelOption{}, uint32(config.Threads))
 		if err != nil {
 			return err
 		}
diff --git a/api/options.go b/api/options.go
new file mode 100644
index 00000000..f99dda4f
--- /dev/null
+++ b/api/options.go
@@ -0,0 +1,108 @@
+package api
+
+import (
+	"context"
+
+	model "github.com/go-skynet/LocalAI/pkg/model"
+)
+
+type Option struct {
+	context                         context.Context
+	configFile                      string
+	loader                          *model.ModelLoader
+	uploadLimitMB, threads, ctxSize int
+	f16                             bool
+	debug, disableMessage           bool
+	imageDir                        string
+	cors                            bool
+	corsAllowOrigins                string
+}
+
+type AppOption func(*Option)
+
+func newOptions(o ...AppOption) *Option {
+	opt := &Option{
+		context:        context.Background(),
+		uploadLimitMB:  15,
+		threads:        1,
+		ctxSize:        512,
+		debug:          true,
+		disableMessage: true,
+	}
+	for _, oo := range o {
+		oo(opt)
+	}
+	return opt
+}
+
+func WithCors(b bool) AppOption {
+	return func(o *Option) {
+		o.cors = b
+	}
+}
+
+func WithCorsAllowOrigins(b string) AppOption {
+	return func(o *Option) {
+		o.corsAllowOrigins = b
+	}
+}
+
+func WithContext(ctx context.Context) AppOption {
+	return func(o *Option) {
+		o.context = ctx
+	}
+}
+
+func WithConfigFile(configFile string) AppOption {
+	return func(o *Option) {
+		o.configFile = configFile
+	}
+}
+
+func WithModelLoader(loader *model.ModelLoader) AppOption {
+	return func(o *Option) {
+		o.loader = loader
+	}
+}
+
+func WithUploadLimitMB(limit int) AppOption {
+	return func(o *Option) {
+		o.uploadLimitMB = limit
+	}
+}
+
+func WithThreads(threads int) AppOption {
+	return func(o *Option) {
+		o.threads = threads
+	}
+}
+
+func WithContextSize(ctxSize int) AppOption {
+	return func(o *Option) {
+		o.ctxSize = ctxSize
+	}
+}
+
+func WithF16(f16 bool) AppOption {
+	return func(o *Option) {
+		o.f16 = f16
+	}
+}
+
+func WithDebug(debug bool) AppOption {
+	return func(o *Option) {
+		o.debug = debug
+	}
+}
+
+func WithDisableMessage(disableMessage bool) AppOption {
+	return func(o *Option) {
+		o.disableMessage = disableMessage
+	}
+}
+
+func WithImageDir(imageDir string) AppOption {
+	return func(o *Option) {
+		o.imageDir = imageDir
+	}
+}
diff --git a/main.go b/main.go
index f3ffc033..c52399e2 100644
--- a/main.go
+++ b/main.go
@@ -1,7 +1,6 @@
 package main
 
 import (
-	"context"
 	"fmt"
 	"os"
 	"path/filepath"
@@ -34,6 +33,14 @@ func main() {
 				Name:    "debug",
 				EnvVars: []string{"DEBUG"},
 			},
+			&cli.BoolFlag{
+				Name:    "cors",
+				EnvVars: []string{"CORS"},
+			},
+			&cli.StringFlag{
+				Name:    "cors-allow-origins",
+				EnvVars: []string{"CORS_ALLOW_ORIGINS"},
+			},
 			&cli.IntFlag{
 				Name:        "threads",
 				DefaultText: "Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested.",
@@ -94,7 +101,17 @@ It uses llama.cpp, ggml and gpt4all as backend with golang c bindings.
 		Copyright: "go-skynet authors",
 		Action: func(ctx *cli.Context) error {
 			fmt.Printf("Starting LocalAI using %d threads, with models path: %s\n", ctx.Int("threads"), ctx.String("models-path"))
-			return api.App(context.Background(), ctx.String("config-file"), model.NewModelLoader(ctx.String("models-path")), ctx.Int("upload-limit"), ctx.Int("threads"), ctx.Int("context-size"), ctx.Bool("f16"), ctx.Bool("debug"), false, ctx.String("image-path")).Listen(ctx.String("address"))
+			return api.App(
+				api.WithConfigFile(ctx.String("config-file")),
+				api.WithModelLoader(model.NewModelLoader(ctx.String("models-path"))),
+				api.WithContextSize(ctx.Int("context-size")),
+				api.WithDebug(ctx.Bool("debug")),
+				api.WithImageDir(ctx.String("image-path")),
+				api.WithF16(ctx.Bool("f16")),
+				api.WithCors(ctx.Bool("cors")),
+				api.WithCorsAllowOrigins(ctx.String("cors-allow-origins")),
+				api.WithThreads(ctx.Int("threads")),
+				api.WithUploadLimitMB(ctx.Int("upload-limit"))).Listen(ctx.String("address"))
 		},
 	}
 

From b36d9f37769dafd79d4feb4f812044b44518d6a3 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Sun, 21 May 2023 14:38:52 +0200
Subject: [PATCH 006/137] fix(deps): update
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to aba1147 (#333)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index adb9c45a..3fba8362 100644
--- a/go.mod
+++ b/go.mod
@@ -15,7 +15,7 @@ require (
 	github.com/hashicorp/go-multierror v1.1.1
 	github.com/imdario/mergo v0.3.15
 	github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642
-	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230519014017-914519e772fd
+	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230521011615-aba1147a2253
 	github.com/onsi/ginkgo/v2 v2.9.5
 	github.com/onsi/gomega v1.27.7
 	github.com/otiai10/openaigo v1.1.0
diff --git a/go.sum b/go.sum
index 20a4b22b..34696973 100644
--- a/go.sum
+++ b/go.sum
@@ -101,6 +101,8 @@ github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWb
 github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230519014017-914519e772fd h1:kMnZASxCNc8GsPuAV94tltEsfT6T+esuB+rgzdjwFVM=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230519014017-914519e772fd/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230521011615-aba1147a2253 h1:7udNpoHYOBktcpCEe8aDaPJ0LyzyRhVjpzAGFjPxPkY=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230521011615-aba1147a2253/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/onsi/ginkgo/v2 v2.9.5 h1:+6Hr4uxzP4XIUyAkg61dWBw8lb/gc4/X5luuxN/EC+Q=
 github.com/onsi/ginkgo/v2 v2.9.5/go.mod h1:tvAoo1QUJwNEU2ITftXTpR7R1RbCzoZUOs3RonqW57k=
 github.com/onsi/gomega v1.27.7 h1:fVih9JD6ogIiHUN6ePK7HJidyEDpWGVB5mzM7cWNXoU=

From 4aa78843c0b15764491e3b12be2a8c0b9a1c0aa8 Mon Sep 17 00:00:00 2001
From: Robert Hambrock <roberthambrock@gmail.com>
Date: Sun, 21 May 2023 15:24:04 +0200
Subject: [PATCH 007/137] fix: spec compliant instantiation and termination of
 streams (#341)

---
 api/openai.go | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/api/openai.go b/api/openai.go
index dffdcbfe..b97b4e56 100644
--- a/api/openai.go
+++ b/api/openai.go
@@ -259,10 +259,17 @@ func embeddingsEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
 func chatEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
 
 	process := func(s string, req *OpenAIRequest, config *Config, loader *model.ModelLoader, responses chan OpenAIResponse) {
+		initialMessage := OpenAIResponse{
+			Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
+			Choices: []Choice{{Delta: &Message{Role: "assistant"}}},
+			Object:  "chat.completion.chunk",
+		}
+		responses <- initialMessage
+
 		ComputeChoices(s, req, config, loader, func(s string, c *[]Choice) {}, func(s string) bool {
 			resp := OpenAIResponse{
 				Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
-				Choices: []Choice{{Delta: &Message{Role: "assistant", Content: s}}},
+				Choices: []Choice{{Delta: &Message{Content: s}}},
 				Object:  "chat.completion.chunk",
 			}
 			log.Debug().Msgf("Sending goroutine: %s", s)
@@ -339,13 +346,11 @@ func chatEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
 					enc := json.NewEncoder(&buf)
 					enc.Encode(ev)
 
-					fmt.Fprintf(w, "event: data\n\n")
-					fmt.Fprintf(w, "data: %v\n\n", buf.String())
 					log.Debug().Msgf("Sending chunk: %s", buf.String())
+					fmt.Fprintf(w, "data: %v\n", buf.String())
 					w.Flush()
 				}
 
-				w.WriteString("event: data\n\n")
 				resp := &OpenAIResponse{
 					Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
 					Choices: []Choice{{FinishReason: "stop"}},
@@ -353,6 +358,7 @@ func chatEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
 				respData, _ := json.Marshal(resp)
 
 				w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
+				w.WriteString("data: [DONE]\n\n")
 				w.Flush()
 			}))
 			return nil

From 9630be56e1254bbb0108dbd67030912b45893014 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 21 May 2023 15:24:22 +0200
Subject: [PATCH 008/137] fix: make sure ca-certificates is present in the
 container images (#342)

---
 Dockerfile     | 2 +-
 Dockerfile.dev | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index 27ab3800..a39b3e22 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -3,7 +3,7 @@ ARG BUILD_TYPE=
 FROM golang:$GO_VERSION
 ENV REBUILD=true
 WORKDIR /build
-RUN apt-get update && apt-get install -y cmake libgomp1 libopenblas-dev libopenblas-base libopencv-dev libopencv-core-dev libopencv-core4.5 
+RUN apt-get update && apt-get install -y cmake libgomp1 libopenblas-dev libopenblas-base libopencv-dev libopencv-core-dev libopencv-core4.5 ca-certificates
 COPY . .
 RUN ln -s /usr/include/opencv4/opencv2/ /usr/include/opencv2
 RUN make build
diff --git a/Dockerfile.dev b/Dockerfile.dev
index 50b15944..d688f54b 100644
--- a/Dockerfile.dev
+++ b/Dockerfile.dev
@@ -11,5 +11,6 @@ RUN make build
 
 FROM debian:$DEBIAN_VERSION
 COPY --from=builder /build/local-ai /usr/bin/local-ai
+RUN apt-get update && apt-get install -y ca-certificates
 EXPOSE 8080
 ENTRYPOINT [ "/usr/bin/local-ai" ]
\ No newline at end of file

From 2912f9870f2a681a4ec8799577f1b10ac4d0a015 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 22 May 2023 00:13:16 +0200
Subject: [PATCH 009/137] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 43a7c99d..dee37a12 100644
--- a/README.md
+++ b/README.md
@@ -23,7 +23,7 @@ In a nutshell:
 
 LocalAI is a community-driven project, focused on making the AI accessible to anyone. Any contribution, feedback and PR is welcome! It was initially created by [mudler](https://github.com/mudler/) at the [SpectroCloud OSS Office](https://github.com/spectrocloud).
 
-See the [usage](https://github.com/go-skynet/LocalAI#usage) and [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/) sections to learn how to use LocalAI.
+See the [usage](https://github.com/go-skynet/LocalAI#usage) and [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/) sections to learn how to use LocalAI. For a list of curated models check out the [model gallery](https://github.com/go-skynet/model-gallery).
 
 ### How does it work?  
 

From 1cbe6a7067121d0604c241f738931e76f683d2af Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Mon, 22 May 2023 19:02:56 +0200
Subject: [PATCH 010/137] :arrow_up: Update nomic-ai/gpt4all (#345)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 393e7ce3..bf3177ab 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
 
 GOLLAMA_VERSION?=ccf23adfb278c0165d388389a5d60f3fe38e4854
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
-GPT4ALL_VERSION?=914519e772fd78c15691dcd0b8bac60d6af514ec
+GPT4ALL_VERSION?=c8c95ab46f922f7efaa241f14d9c56086aa4ab98
 GOGPT2_VERSION?=7bff56f0224502c1c9ed6258d2a17e8084628827
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=07166da10cb2a9e8854395a4f210464dcea76e47

From 6b19356740498693ddc4f1a559fb92160f6c65f1 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Mon, 22 May 2023 19:04:21 +0200
Subject: [PATCH 011/137] fix(deps): update
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to c8c95ab (#344)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 3fba8362..e4d7e062 100644
--- a/go.mod
+++ b/go.mod
@@ -15,7 +15,7 @@ require (
 	github.com/hashicorp/go-multierror v1.1.1
 	github.com/imdario/mergo v0.3.15
 	github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642
-	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230521011615-aba1147a2253
+	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522155256-c8c95ab46f92
 	github.com/onsi/ginkgo/v2 v2.9.5
 	github.com/onsi/gomega v1.27.7
 	github.com/otiai10/openaigo v1.1.0
diff --git a/go.sum b/go.sum
index 34696973..0a59b494 100644
--- a/go.sum
+++ b/go.sum
@@ -103,6 +103,8 @@ github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230519014017-914519
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230519014017-914519e772fd/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230521011615-aba1147a2253 h1:7udNpoHYOBktcpCEe8aDaPJ0LyzyRhVjpzAGFjPxPkY=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230521011615-aba1147a2253/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522155256-c8c95ab46f92 h1:brOLJSsTLnFK2vUVi7MaVdxAEhHkOsoboR0vR5WW1HU=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522155256-c8c95ab46f92/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/onsi/ginkgo/v2 v2.9.5 h1:+6Hr4uxzP4XIUyAkg61dWBw8lb/gc4/X5luuxN/EC+Q=
 github.com/onsi/ginkgo/v2 v2.9.5/go.mod h1:tvAoo1QUJwNEU2ITftXTpR7R1RbCzoZUOs3RonqW57k=
 github.com/onsi/gomega v1.27.7 h1:fVih9JD6ogIiHUN6ePK7HJidyEDpWGVB5mzM7cWNXoU=

From 043399dd07d3a9981348fa7663184cb3583a5c9b Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 23 May 2023 00:06:13 +0200
Subject: [PATCH 012/137] fix: re-enable start API message (#349)

Signed-off-by: mudler <mudler@mocaccino.org>
---
 main.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/main.go b/main.go
index c52399e2..b5105fe7 100644
--- a/main.go
+++ b/main.go
@@ -108,6 +108,7 @@ It uses llama.cpp, ggml and gpt4all as backend with golang c bindings.
 				api.WithDebug(ctx.Bool("debug")),
 				api.WithImageDir(ctx.String("image-path")),
 				api.WithF16(ctx.Bool("f16")),
+				api.WithDisableMessage(false),
 				api.WithCors(ctx.Bool("cors")),
 				api.WithCorsAllowOrigins(ctx.String("cors-allow-origins")),
 				api.WithThreads(ctx.Int("threads")),

From 57172e2e3074f4b95434e5324e699e68038ff21a Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Tue, 23 May 2023 00:06:29 +0200
Subject: [PATCH 013/137] fix(deps): update
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 5ca8767 (#350)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index e4d7e062..ac5304f6 100644
--- a/go.mod
+++ b/go.mod
@@ -15,7 +15,7 @@ require (
 	github.com/hashicorp/go-multierror v1.1.1
 	github.com/imdario/mergo v0.3.15
 	github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642
-	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522155256-c8c95ab46f92
+	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522200803-5ca8767c81a2
 	github.com/onsi/ginkgo/v2 v2.9.5
 	github.com/onsi/gomega v1.27.7
 	github.com/otiai10/openaigo v1.1.0
diff --git a/go.sum b/go.sum
index 0a59b494..96ead723 100644
--- a/go.sum
+++ b/go.sum
@@ -105,6 +105,8 @@ github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230521011615-aba114
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230521011615-aba1147a2253/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522155256-c8c95ab46f92 h1:brOLJSsTLnFK2vUVi7MaVdxAEhHkOsoboR0vR5WW1HU=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522155256-c8c95ab46f92/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522200803-5ca8767c81a2 h1:3368tGU1ooRSPw0zMvXqv9wLMxS82LzEkVSuo8DWZBI=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522200803-5ca8767c81a2/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/onsi/ginkgo/v2 v2.9.5 h1:+6Hr4uxzP4XIUyAkg61dWBw8lb/gc4/X5luuxN/EC+Q=
 github.com/onsi/ginkgo/v2 v2.9.5/go.mod h1:tvAoo1QUJwNEU2ITftXTpR7R1RbCzoZUOs3RonqW57k=
 github.com/onsi/gomega v1.27.7 h1:fVih9JD6ogIiHUN6ePK7HJidyEDpWGVB5mzM7cWNXoU=

From 231a3e7c02d1c33c9949939e93a23a8004e8c721 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Tue, 23 May 2023 00:59:48 +0200
Subject: [PATCH 014/137] fix(deps): update
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 2ce2220 (#351)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index ac5304f6..9b65583e 100644
--- a/go.mod
+++ b/go.mod
@@ -15,7 +15,7 @@ require (
 	github.com/hashicorp/go-multierror v1.1.1
 	github.com/imdario/mergo v0.3.15
 	github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642
-	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522200803-5ca8767c81a2
+	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522220313-2ce22208a3dd
 	github.com/onsi/ginkgo/v2 v2.9.5
 	github.com/onsi/gomega v1.27.7
 	github.com/otiai10/openaigo v1.1.0
diff --git a/go.sum b/go.sum
index 96ead723..e09fba34 100644
--- a/go.sum
+++ b/go.sum
@@ -107,6 +107,8 @@ github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522155256-c8c95a
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522155256-c8c95ab46f92/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522200803-5ca8767c81a2 h1:3368tGU1ooRSPw0zMvXqv9wLMxS82LzEkVSuo8DWZBI=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522200803-5ca8767c81a2/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522220313-2ce22208a3dd h1:is/rE0YD8oEWcX3fQ+VxoS3fD0LqFEmTxh8XZegYYsA=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522220313-2ce22208a3dd/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/onsi/ginkgo/v2 v2.9.5 h1:+6Hr4uxzP4XIUyAkg61dWBw8lb/gc4/X5luuxN/EC+Q=
 github.com/onsi/ginkgo/v2 v2.9.5/go.mod h1:tvAoo1QUJwNEU2ITftXTpR7R1RbCzoZUOs3RonqW57k=
 github.com/onsi/gomega v1.27.7 h1:fVih9JD6ogIiHUN6ePK7HJidyEDpWGVB5mzM7cWNXoU=

From 9e5cd0f10be7587c6734583b604ba85065106a0b Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Tue, 23 May 2023 09:16:56 +0200
Subject: [PATCH 015/137] :arrow_up: Update nomic-ai/gpt4all (#348)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index bf3177ab..bd97efd8 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
 
 GOLLAMA_VERSION?=ccf23adfb278c0165d388389a5d60f3fe38e4854
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
-GPT4ALL_VERSION?=c8c95ab46f922f7efaa241f14d9c56086aa4ab98
+GPT4ALL_VERSION?=8119ff4df0a99bde44255db2b8c7290b5582ac2b
 GOGPT2_VERSION?=7bff56f0224502c1c9ed6258d2a17e8084628827
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=07166da10cb2a9e8854395a4f210464dcea76e47

From f5f8c687bef3ea177f4a40e84d74b9b2680e300e Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 23 May 2023 10:32:34 +0200
Subject: [PATCH 016/137] examples: add privateGPT example (#355)

---
 examples/README.md            |  8 ++++++++
 examples/privateGPT/README.md | 25 +++++++++++++++++++++++++
 2 files changed, 33 insertions(+)
 create mode 100644 examples/privateGPT/README.md

diff --git a/examples/README.md b/examples/README.md
index e5f74e69..44525178 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -57,6 +57,14 @@ A full example on how to run RWKV models with LocalAI
 
 [Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/rwkv/)
 
+### PrivateGPT
+
+_by [@mudler](https://github.com/mudler)_
+
+A full example on how to run PrivateGPT with LocalAI
+
+[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/privateGPT/)
+
 ### Slack bot
 
 _by [@mudler](https://github.com/mudler)_
diff --git a/examples/privateGPT/README.md b/examples/privateGPT/README.md
new file mode 100644
index 00000000..faf682c4
--- /dev/null
+++ b/examples/privateGPT/README.md
@@ -0,0 +1,25 @@
+# privateGPT
+
+This example is a re-adaptation of https://github.com/imartinez/privateGPT to work with LocalAI and OpenAI endpoints. We have a fork with the changes required to work with privateGPT here https://github.com/go-skynet/privateGPT ( PR: https://github.com/imartinez/privateGPT/pull/408 ).
+
+Follow the instructions in https://github.com/go-skynet/privateGPT:
+
+```bash
+git clone git@github.com:go-skynet/privateGPT.git
+cd privateGPT
+pip install -r requirements.txt
+```
+
+Rename `example.env` to `.env` and edit the variables appropriately.
+
+This is an example `.env` file for LocalAI:
+
+```
+PERSIST_DIRECTORY=db
+# Set to OpenAI here
+MODEL_TYPE=OpenAI
+EMBEDDINGS_MODEL_NAME=all-MiniLM-L6-v2
+MODEL_N_CTX=1000
+# LocalAI URL
+OPENAI_API_BASE=http://localhost:8080/v1
+```
\ No newline at end of file

From 43d3fb3eba9a17df8178cac79dab5033f8f340f3 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 23 May 2023 17:12:48 +0200
Subject: [PATCH 017/137] ci: add binary releases pipelines (#358)

---
 .github/release.yml                    | 24 ++++++++
 .github/workflows/release.yaml         | 81 ++++++++++++++++++++++++++
 .github/workflows/release.yml.disabled | 26 ---------
 .gitignore                             |  2 +
 .goreleaser.yaml                       | 15 -----
 Makefile                               |  8 +++
 6 files changed, 115 insertions(+), 41 deletions(-)
 create mode 100644 .github/release.yml
 create mode 100644 .github/workflows/release.yaml
 delete mode 100644 .github/workflows/release.yml.disabled
 delete mode 100644 .goreleaser.yaml

diff --git a/.github/release.yml b/.github/release.yml
new file mode 100644
index 00000000..c86866c5
--- /dev/null
+++ b/.github/release.yml
@@ -0,0 +1,24 @@
+# .github/release.yml
+
+changelog:
+  exclude:
+    labels:
+      - ignore-for-release
+  categories:
+    - title: Breaking Changes 🛠
+      labels:
+        - Semver-Major
+        - breaking-change
+    - title: "Bug fixes :bug:"
+      labels:
+        - bug
+    - title: Exciting New Features 🎉
+      labels:
+        - Semver-Minor
+        - enhancement
+    - title: 👒 Dependencies
+      labels:
+        - dependencies
+    - title: Other Changes
+      labels:
+        - "*"
\ No newline at end of file
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
new file mode 100644
index 00000000..7444304d
--- /dev/null
+++ b/.github/workflows/release.yaml
@@ -0,0 +1,81 @@
+name: Build and Release
+
+on: push
+
+jobs:
+  build-linux:
+    strategy:
+      matrix:
+        include:
+          - build: 'avx2'
+            defines: ''
+          - build: 'avx'
+            defines: '-DLLAMA_AVX2=OFF'
+          - build: 'avx512'
+            defines: '-DLLAMA_AVX512=ON'
+    runs-on: ubuntu-latest
+    steps:
+      - name: Clone
+        uses: actions/checkout@v3
+        with: 
+          submodules: true
+      - name: Dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install build-essential ffmpeg
+      - name: Build
+        id: build
+        env:
+          CMAKE_ARGS: "${{ matrix.define }}"
+          BUILD_ID: "${{ matrix.build }}"
+        run: |
+          make dist
+      - uses: actions/upload-artifact@v3
+        with:
+          name: ${{ matrix.build }}
+          path: release/
+      - name: Release
+        uses: softprops/action-gh-release@v1
+        if: startsWith(github.ref, 'refs/tags/')
+        with:
+          files: |
+            release/*
+
+  build-macOS:
+    strategy:
+      matrix:
+        include:
+          - build: 'avx2'
+            defines: ''
+          - build: 'avx'
+            defines: '-DLLAMA_AVX2=OFF'
+          - build: 'avx512'
+            defines: '-DLLAMA_AVX512=ON'
+    runs-on: macOS-latest
+    steps:
+      - name: Clone
+        uses: actions/checkout@v3
+        with: 
+          submodules: true
+
+      - name: Dependencies
+        run: |
+          brew update
+          brew install sdl2 ffmpeg
+      - name: Build
+        id: build
+        env:
+          CMAKE_ARGS: "${{ matrix.define }}"
+          BUILD_ID: "${{ matrix.build }}"
+        run: |
+          make dist
+      - uses: actions/upload-artifact@v3
+        with:
+          name: ${{ matrix.build }}
+          path: release/
+      - name: Release
+        uses: softprops/action-gh-release@v1
+        if: startsWith(github.ref, 'refs/tags/')
+        with:
+          files: |
+            release/*
\ No newline at end of file
diff --git a/.github/workflows/release.yml.disabled b/.github/workflows/release.yml.disabled
deleted file mode 100644
index 460a2a45..00000000
--- a/.github/workflows/release.yml.disabled
+++ /dev/null
@@ -1,26 +0,0 @@
-name: goreleaser
-
-on:
-  push:
-    tags:
-      - 'v*'
-
-jobs:
-  goreleaser:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          fetch-depth: 0
-      - name: Set up Go
-        uses: actions/setup-go@v3
-        with:
-          go-version: 1.18
-      - name: Run GoReleaser
-        uses: goreleaser/goreleaser-action@v4
-        with:
-          version: latest
-          args: release --clean
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 12c461c4..10f3a35e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,5 +16,7 @@ local-ai
 models/*
 test-models/
 
+release/
+
 # just in case
 .DS_Store
diff --git a/.goreleaser.yaml b/.goreleaser.yaml
deleted file mode 100644
index 6446f903..00000000
--- a/.goreleaser.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-# Make sure to check the documentation at http://goreleaser.com
-project_name: local-ai
-builds:
-  - ldflags:
-      - -w -s
-    env:
-      - CGO_ENABLED=0
-    goos:
-      - linux
-      - darwin
-      - windows
-    goarch:
-      - amd64
-      - arm64
-    binary: '{{ .ProjectName }}'
\ No newline at end of file
diff --git a/Makefile b/Makefile
index bd97efd8..4a9ae10b 100644
--- a/Makefile
+++ b/Makefile
@@ -17,9 +17,12 @@ CGO_LDFLAGS?=
 CUDA_LIBPATH?=/usr/local/cuda/lib64/
 STABLEDIFFUSION_VERSION?=c0748eca3642d58bcf9521108bcee46959c647dc
 GO_TAGS?=
+BUILD_ID?=git
 
 OPTIONAL_TARGETS?=
 
+OS := $(shell uname -s)
+ARCH := $(shell uname -m)
 GREEN  := $(shell tput -Txterm setaf 2)
 YELLOW := $(shell tput -Txterm setaf 3)
 WHITE  := $(shell tput -Txterm setaf 7)
@@ -186,6 +189,7 @@ clean: ## Remove build related file
 	rm -rf ./bloomz
 	rm -rf ./whisper.cpp
 	rm -rf $(BINARY_NAME)
+	rm -rf release/
 
 ## Build:
 
@@ -195,6 +199,10 @@ build: prepare ## Build the project
 	$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -tags "$(GO_TAGS)" -x -o $(BINARY_NAME) ./
 
+dist: build
+	mkdir -p release
+	cp $(BINARY_NAME) release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH)
+
 generic-build: ## Build the project using generic
 	BUILD_TYPE="generic" $(MAKE) build
 

From 9decd0813c271b9ef2da92722462718d1d435595 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 23 May 2023 21:47:47 +0200
Subject: [PATCH 018/137] feat: update go-gpt2 (#359)

Signed-off-by: mudler <mudler@mocaccino.org>
---
 .github/workflows/bump_deps.yaml      |   4 +-
 .github/workflows/release.yaml        |   3 +
 Makefile                              |  48 +++++-----
 README.md                             |  16 ++--
 api/prediction.go                     | 128 +++++++++++++-------------
 go.mod                                |   3 +-
 go.sum                                |  32 +------
 pkg/model/initializers.go             |  51 +++++-----
 tests/models_fixtures/embeddings.yaml |   1 -
 tests/models_fixtures/rwkv.yaml       |   1 -
 10 files changed, 131 insertions(+), 156 deletions(-)

diff --git a/.github/workflows/bump_deps.yaml b/.github/workflows/bump_deps.yaml
index 3c0f4202..5e876f15 100644
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@@ -12,8 +12,8 @@ jobs:
           - repository: "go-skynet/go-llama.cpp"
             variable: "GOLLAMA_VERSION"
             branch: "master"
-          - repository: "go-skynet/go-gpt2.cpp"
-            variable: "GOGPT2_VERSION"
+          - repository: "go-skynet/go-ggml-transformers.cpp"
+            variable: "GOGGMLTRANSFORMERS_VERSION"
             branch: "master"
           - repository: "donomii/go-rwkv.cpp"
             variable: "RWKV_VERSION"
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index 7444304d..ea49088b 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -2,6 +2,9 @@ name: Build and Release
 
 on: push
 
+permissions:
+  contents: write
+
 jobs:
   build-linux:
     strategy:
diff --git a/Makefile b/Makefile
index 4a9ae10b..03d0c851 100644
--- a/Makefile
+++ b/Makefile
@@ -6,7 +6,7 @@ BINARY_NAME=local-ai
 GOLLAMA_VERSION?=ccf23adfb278c0165d388389a5d60f3fe38e4854
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
 GPT4ALL_VERSION?=8119ff4df0a99bde44255db2b8c7290b5582ac2b
-GOGPT2_VERSION?=7bff56f0224502c1c9ed6258d2a17e8084628827
+GOGGMLTRANSFORMERS_VERSION?=14fd6c9
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=07166da10cb2a9e8854395a4f210464dcea76e47
 WHISPER_CPP_VERSION?=041be06d5881d3c759cc4ed45d655804361237cd
@@ -29,8 +29,8 @@ WHITE  := $(shell tput -Txterm setaf 7)
 CYAN   := $(shell tput -Txterm setaf 6)
 RESET  := $(shell tput -Txterm sgr0)
 
-C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-stable-diffusion/:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
-LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-stable-diffusion/:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
+C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-stable-diffusion/:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-ggml-transformers:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
+LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-stable-diffusion/:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-ggml-transformers:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
 
 ifeq ($(BUILD_TYPE),openblas)
 	CGO_LDFLAGS+=-lopenblas
@@ -117,23 +117,23 @@ gpt4all/gpt4all-bindings/golang/libgpt4all.a: gpt4all
 	$(MAKE) -C gpt4all/gpt4all-bindings/golang/ libgpt4all.a
 
 ## CEREBRAS GPT
-go-gpt2: 
-	git clone --recurse-submodules https://github.com/go-skynet/go-gpt2.cpp go-gpt2
-	cd go-gpt2 && git checkout -b build $(GOGPT2_VERSION) && git submodule update --init --recursive --depth 1
+go-ggml-transformers: 
+	git clone --recurse-submodules https://github.com/go-skynet/go-ggml-transformers.cpp go-ggml-transformers
+	cd go-ggml-transformers && git checkout -b build $(GOGPT2_VERSION) && git submodule update --init --recursive --depth 1
 	# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
-	@find ./go-gpt2 -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
-	@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
-	@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
-	@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_print_usage/gpt2_print_usage/g' {} +
-	@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_print_usage/gpt2_print_usage/g' {} +
-	@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_params_parse/gpt2_params_parse/g' {} +
-	@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_params_parse/gpt2_params_parse/g' {} +
-	@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_random_prompt/gpt2_random_prompt/g' {} +
-	@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_random_prompt/gpt2_random_prompt/g' {} +
-	@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gpt2_/g' {} +
+	@find ./go-ggml-transformers -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
+	@find ./go-ggml-transformers -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
+	@find ./go-ggml-transformers -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
+	@find ./go-ggml-transformers -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_print_usage/gpt2_print_usage/g' {} +
+	@find ./go-ggml-transformers -type f -name "*.h" -exec sed -i'' -e 's/gpt_print_usage/gpt2_print_usage/g' {} +
+	@find ./go-ggml-transformers -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_params_parse/gpt2_params_parse/g' {} +
+	@find ./go-ggml-transformers -type f -name "*.h" -exec sed -i'' -e 's/gpt_params_parse/gpt2_params_parse/g' {} +
+	@find ./go-ggml-transformers -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_random_prompt/gpt2_random_prompt/g' {} +
+	@find ./go-ggml-transformers -type f -name "*.h" -exec sed -i'' -e 's/gpt_random_prompt/gpt2_random_prompt/g' {} +
+	@find ./go-ggml-transformers -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gpt2_/g' {} +
 
-go-gpt2/libgpt2.a: go-gpt2
-	$(MAKE) -C go-gpt2 libgpt2.a
+go-ggml-transformers/libtransformers.a: go-ggml-transformers
+	$(MAKE) -C go-ggml-transformers libtransformers.a
 
 whisper.cpp:
 	git clone https://github.com/ggerganov/whisper.cpp.git
@@ -155,21 +155,21 @@ go-llama/libbinding.a: go-llama
 replace:
 	$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
 	$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(shell pwd)/gpt4all/gpt4all-bindings/golang
-	$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt2.cpp=$(shell pwd)/go-gpt2
+	$(GOCMD) mod edit -replace github.com/go-skynet/go-ggml-transformers.cpp=$(shell pwd)/go-ggml-transformers
 	$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv
 	$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(shell pwd)/whisper.cpp
 	$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(shell pwd)/go-bert
 	$(GOCMD) mod edit -replace github.com/go-skynet/bloomz.cpp=$(shell pwd)/bloomz
 	$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(shell pwd)/go-stable-diffusion
 
-prepare-sources: go-llama go-gpt2 gpt4all go-rwkv whisper.cpp go-bert bloomz go-stable-diffusion replace
+prepare-sources: go-llama go-ggml-transformers gpt4all go-rwkv whisper.cpp go-bert bloomz go-stable-diffusion replace
 	$(GOCMD) mod download
 
 ## GENERIC
 rebuild: ## Rebuilds the project
 	$(MAKE) -C go-llama clean
 	$(MAKE) -C gpt4all/gpt4all-bindings/golang/ clean
-	$(MAKE) -C go-gpt2 clean
+	$(MAKE) -C go-ggml-transformers clean
 	$(MAKE) -C go-rwkv clean
 	$(MAKE) -C whisper.cpp clean
 	$(MAKE) -C go-stable-diffusion clean
@@ -177,13 +177,13 @@ rebuild: ## Rebuilds the project
 	$(MAKE) -C bloomz clean
 	$(MAKE) build
 
-prepare: prepare-sources gpt4all/gpt4all-bindings/golang/libgpt4all.a $(OPTIONAL_TARGETS) go-llama/libbinding.a go-bert/libgobert.a go-gpt2/libgpt2.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a bloomz/libbloomz.a  ## Prepares for building
+prepare: prepare-sources gpt4all/gpt4all-bindings/golang/libgpt4all.a $(OPTIONAL_TARGETS) go-llama/libbinding.a go-bert/libgobert.a go-ggml-transformers/libtransformers.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a bloomz/libbloomz.a  ## Prepares for building
 
 clean: ## Remove build related file
 	rm -fr ./go-llama
 	rm -rf ./gpt4all
 	rm -rf ./go-stable-diffusion
-	rm -rf ./go-gpt2
+	rm -rf ./go-ggml-transformers
 	rm -rf ./go-rwkv
 	rm -rf ./go-bert
 	rm -rf ./bloomz
@@ -213,7 +213,7 @@ run: prepare ## run local-ai
 test-models/testmodel:
 	mkdir test-models
 	mkdir test-dir
-	wget https://huggingface.co/concedo/cerebras-111M-ggml/resolve/main/cerberas-111m-q4_0.bin -O test-models/testmodel
+	wget https://huggingface.co/nnakasato/ggml-model-test/resolve/main/ggml-model-q4.bin -O test-models/testmodel
 	wget https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
 	wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O test-models/bert
 	wget https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
diff --git a/README.md b/README.md
index dee37a12..7b9287e1 100644
--- a/README.md
+++ b/README.md
@@ -129,13 +129,13 @@ Depending on the model you are attempting to run might need more RAM or CPU reso
 | [gpt4all-llama](https://github.com/nomic-ai/gpt4all)      | Vicuna, Alpaca, LLaMa | yes                      | no                        | no                                | yes                  |
 | [gpt4all-mpt](https://github.com/nomic-ai/gpt4all)          | MPT                   | yes                      | no                        | no                                | yes                  |
 | [gpt4all-j](https://github.com/nomic-ai/gpt4all)           | GPT4ALL-J             | yes                      | no                        | no                                | yes                  |
-| [gpt2](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-gpt2.cpp))             | GPT/NeoX, Cerebras    | yes                      | no                        | no                                | no                   |
-| [dolly](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-gpt2.cpp))            | Dolly                 | yes                      | no                        | no                                | no                   |
-| [redpajama](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-gpt2.cpp))        | RedPajama             | yes                      | no                        | no                                | no                   |
-| [stableLM](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-gpt2.cpp))         | StableLM GPT/NeoX     | yes                      | no                        | no                                | no                   |
-| [replit](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-gpt2.cpp))        | Replit             | yes                      | no                        | no                                | no                   |
-| [gptneox](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-gpt2.cpp))        | GPT NeoX             | yes                      | no                        | no                                | no                   |
-| [starcoder](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-gpt2.cpp))        | Starcoder             | yes                      | no                        | no                                | no                   |
+| [gpt2](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp))             | GPT/NeoX, Cerebras    | yes                      | no                        | no                                | no                   |
+| [dolly](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp))            | Dolly                 | yes                      | no                        | no                                | no                   |
+| [gptj](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp))        | GPTJ             | yes                      | no                        | no                                | no                   |
+| [mpt](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp))         | MPT     | yes                      | no                        | no                                | no                   |
+| [replit](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp))        | Replit             | yes                      | no                        | no                                | no                   |
+| [gptneox](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp))        | GPT NeoX, RedPajama, StableLM             | yes                      | no                        | no                                | no                   |
+| [starcoder](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp))        | Starcoder             | yes                      | no                        | no                                | no                   |
 | [bloomz](https://github.com/NouamaneTazi/bloomz.cpp) ([binding](https://github.com/go-skynet/bloomz.cpp))       | Bloom                 | yes                      | no                        | no                                | no                   |
 | [rwkv](https://github.com/saharNooby/rwkv.cpp) ([binding](https://github.com/donomii/go-rw))       | rwkv                 | yes                      | no                        | no                                | yes                   |
 | [bert](https://github.com/skeskinen/bert.cpp) ([binding](https://github.com/go-skynet/go-bert.cpp) | bert                  | no                       | no                  | yes                               | no                   |    
@@ -1045,7 +1045,7 @@ MIT
 
 - [go-skynet/go-llama.cpp](https://github.com/go-skynet/go-llama.cpp)
 - [go-skynet/go-gpt4all-j.cpp](https://github.com/go-skynet/go-gpt4all-j.cpp)
-- [go-skynet/go-gpt2.cpp](https://github.com/go-skynet/go-gpt2.cpp)
+- [go-skynet/go-ggml-transformers.cpp](https://github.com/go-skynet/go-ggml-transformers.cpp)
 - [go-skynet/go-bert.cpp](https://github.com/go-skynet/go-bert.cpp)
 - [donomii/go-rwkv.cpp](https://github.com/donomii/go-rwkv.cpp)
 
diff --git a/api/prediction.go b/api/prediction.go
index c279e08d..08a01e06 100644
--- a/api/prediction.go
+++ b/api/prediction.go
@@ -11,7 +11,7 @@ import (
 	"github.com/go-skynet/LocalAI/pkg/stablediffusion"
 	"github.com/go-skynet/bloomz.cpp"
 	bert "github.com/go-skynet/go-bert.cpp"
-	gpt2 "github.com/go-skynet/go-gpt2.cpp"
+	transformers "github.com/go-skynet/go-ggml-transformers.cpp"
 	llama "github.com/go-skynet/go-llama.cpp"
 	gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
 )
@@ -243,23 +243,23 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
 
 			return response, nil
 		}
-	case *gpt2.GPTNeoX:
+	case *transformers.GPTNeoX:
 		fn = func() (string, error) {
 			// Generate the prediction using the language model
-			predictOptions := []gpt2.PredictOption{
-				gpt2.SetTemperature(c.Temperature),
-				gpt2.SetTopP(c.TopP),
-				gpt2.SetTopK(c.TopK),
-				gpt2.SetTokens(c.Maxtokens),
-				gpt2.SetThreads(c.Threads),
+			predictOptions := []transformers.PredictOption{
+				transformers.SetTemperature(c.Temperature),
+				transformers.SetTopP(c.TopP),
+				transformers.SetTopK(c.TopK),
+				transformers.SetTokens(c.Maxtokens),
+				transformers.SetThreads(c.Threads),
 			}
 
 			if c.Batch != 0 {
-				predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
+				predictOptions = append(predictOptions, transformers.SetBatch(c.Batch))
 			}
 
 			if c.Seed != 0 {
-				predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
+				predictOptions = append(predictOptions, transformers.SetSeed(c.Seed))
 			}
 
 			return model.Predict(
@@ -267,23 +267,23 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
 				predictOptions...,
 			)
 		}
-	case *gpt2.Replit:
+	case *transformers.Replit:
 		fn = func() (string, error) {
 			// Generate the prediction using the language model
-			predictOptions := []gpt2.PredictOption{
-				gpt2.SetTemperature(c.Temperature),
-				gpt2.SetTopP(c.TopP),
-				gpt2.SetTopK(c.TopK),
-				gpt2.SetTokens(c.Maxtokens),
-				gpt2.SetThreads(c.Threads),
+			predictOptions := []transformers.PredictOption{
+				transformers.SetTemperature(c.Temperature),
+				transformers.SetTopP(c.TopP),
+				transformers.SetTopK(c.TopK),
+				transformers.SetTokens(c.Maxtokens),
+				transformers.SetThreads(c.Threads),
 			}
 
 			if c.Batch != 0 {
-				predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
+				predictOptions = append(predictOptions, transformers.SetBatch(c.Batch))
 			}
 
 			if c.Seed != 0 {
-				predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
+				predictOptions = append(predictOptions, transformers.SetSeed(c.Seed))
 			}
 
 			return model.Predict(
@@ -291,23 +291,23 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
 				predictOptions...,
 			)
 		}
-	case *gpt2.Starcoder:
+	case *transformers.Starcoder:
 		fn = func() (string, error) {
 			// Generate the prediction using the language model
-			predictOptions := []gpt2.PredictOption{
-				gpt2.SetTemperature(c.Temperature),
-				gpt2.SetTopP(c.TopP),
-				gpt2.SetTopK(c.TopK),
-				gpt2.SetTokens(c.Maxtokens),
-				gpt2.SetThreads(c.Threads),
+			predictOptions := []transformers.PredictOption{
+				transformers.SetTemperature(c.Temperature),
+				transformers.SetTopP(c.TopP),
+				transformers.SetTopK(c.TopK),
+				transformers.SetTokens(c.Maxtokens),
+				transformers.SetThreads(c.Threads),
 			}
 
 			if c.Batch != 0 {
-				predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
+				predictOptions = append(predictOptions, transformers.SetBatch(c.Batch))
 			}
 
 			if c.Seed != 0 {
-				predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
+				predictOptions = append(predictOptions, transformers.SetSeed(c.Seed))
 			}
 
 			return model.Predict(
@@ -315,23 +315,23 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
 				predictOptions...,
 			)
 		}
-	case *gpt2.RedPajama:
+	case *transformers.MPT:
 		fn = func() (string, error) {
 			// Generate the prediction using the language model
-			predictOptions := []gpt2.PredictOption{
-				gpt2.SetTemperature(c.Temperature),
-				gpt2.SetTopP(c.TopP),
-				gpt2.SetTopK(c.TopK),
-				gpt2.SetTokens(c.Maxtokens),
-				gpt2.SetThreads(c.Threads),
+			predictOptions := []transformers.PredictOption{
+				transformers.SetTemperature(c.Temperature),
+				transformers.SetTopP(c.TopP),
+				transformers.SetTopK(c.TopK),
+				transformers.SetTokens(c.Maxtokens),
+				transformers.SetThreads(c.Threads),
 			}
 
 			if c.Batch != 0 {
-				predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
+				predictOptions = append(predictOptions, transformers.SetBatch(c.Batch))
 			}
 
 			if c.Seed != 0 {
-				predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
+				predictOptions = append(predictOptions, transformers.SetSeed(c.Seed))
 			}
 
 			return model.Predict(
@@ -359,23 +359,23 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
 				predictOptions...,
 			)
 		}
-	case *gpt2.StableLM:
+	case *transformers.GPTJ:
 		fn = func() (string, error) {
 			// Generate the prediction using the language model
-			predictOptions := []gpt2.PredictOption{
-				gpt2.SetTemperature(c.Temperature),
-				gpt2.SetTopP(c.TopP),
-				gpt2.SetTopK(c.TopK),
-				gpt2.SetTokens(c.Maxtokens),
-				gpt2.SetThreads(c.Threads),
+			predictOptions := []transformers.PredictOption{
+				transformers.SetTemperature(c.Temperature),
+				transformers.SetTopP(c.TopP),
+				transformers.SetTopK(c.TopK),
+				transformers.SetTokens(c.Maxtokens),
+				transformers.SetThreads(c.Threads),
 			}
 
 			if c.Batch != 0 {
-				predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
+				predictOptions = append(predictOptions, transformers.SetBatch(c.Batch))
 			}
 
 			if c.Seed != 0 {
-				predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
+				predictOptions = append(predictOptions, transformers.SetSeed(c.Seed))
 			}
 
 			return model.Predict(
@@ -383,23 +383,23 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
 				predictOptions...,
 			)
 		}
-	case *gpt2.Dolly:
+	case *transformers.Dolly:
 		fn = func() (string, error) {
 			// Generate the prediction using the language model
-			predictOptions := []gpt2.PredictOption{
-				gpt2.SetTemperature(c.Temperature),
-				gpt2.SetTopP(c.TopP),
-				gpt2.SetTopK(c.TopK),
-				gpt2.SetTokens(c.Maxtokens),
-				gpt2.SetThreads(c.Threads),
+			predictOptions := []transformers.PredictOption{
+				transformers.SetTemperature(c.Temperature),
+				transformers.SetTopP(c.TopP),
+				transformers.SetTopK(c.TopK),
+				transformers.SetTokens(c.Maxtokens),
+				transformers.SetThreads(c.Threads),
 			}
 
 			if c.Batch != 0 {
-				predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
+				predictOptions = append(predictOptions, transformers.SetBatch(c.Batch))
 			}
 
 			if c.Seed != 0 {
-				predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
+				predictOptions = append(predictOptions, transformers.SetSeed(c.Seed))
 			}
 
 			return model.Predict(
@@ -407,23 +407,23 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
 				predictOptions...,
 			)
 		}
-	case *gpt2.GPT2:
+	case *transformers.GPT2:
 		fn = func() (string, error) {
 			// Generate the prediction using the language model
-			predictOptions := []gpt2.PredictOption{
-				gpt2.SetTemperature(c.Temperature),
-				gpt2.SetTopP(c.TopP),
-				gpt2.SetTopK(c.TopK),
-				gpt2.SetTokens(c.Maxtokens),
-				gpt2.SetThreads(c.Threads),
+			predictOptions := []transformers.PredictOption{
+				transformers.SetTemperature(c.Temperature),
+				transformers.SetTopP(c.TopP),
+				transformers.SetTopK(c.TopK),
+				transformers.SetTokens(c.Maxtokens),
+				transformers.SetThreads(c.Threads),
 			}
 
 			if c.Batch != 0 {
-				predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
+				predictOptions = append(predictOptions, transformers.SetBatch(c.Batch))
 			}
 
 			if c.Seed != 0 {
-				predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
+				predictOptions = append(predictOptions, transformers.SetSeed(c.Seed))
 			}
 
 			return model.Predict(
diff --git a/go.mod b/go.mod
index 9b65583e..fe5afd43 100644
--- a/go.mod
+++ b/go.mod
@@ -8,7 +8,7 @@ require (
 	github.com/go-audio/wav v1.1.0
 	github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf
 	github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4
-	github.com/go-skynet/go-gpt2.cpp v0.0.0-20230512145559-7bff56f02245
+	github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230523150735-8bfcb3ea6127
 	github.com/go-skynet/go-llama.cpp v0.0.0-20230520155239-ccf23adfb278
 	github.com/gofiber/fiber/v2 v2.46.0
 	github.com/google/uuid v1.3.0
@@ -41,6 +41,7 @@ require (
 	github.com/go-openapi/jsonreference v0.19.6 // indirect
 	github.com/go-openapi/spec v0.20.4 // indirect
 	github.com/go-openapi/swag v0.19.15 // indirect
+	github.com/go-skynet/go-gpt2.cpp v0.0.0-20230523153133-3eb3a32c0874 // indirect
 	github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
 	github.com/google/go-cmp v0.5.9 // indirect
 	github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect
diff --git a/go.sum b/go.sum
index e09fba34..78cc4d1c 100644
--- a/go.sum
+++ b/go.sum
@@ -16,12 +16,6 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/donomii/go-rwkv.cpp v0.0.0-20230515123100-6fdd0c338e56 h1:s8/MZdicstKi5fn9D9mKGIQ/q6IWCYCk/BM68i8v51w=
-github.com/donomii/go-rwkv.cpp v0.0.0-20230515123100-6fdd0c338e56/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
-github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230515153606-95b02d76b04d h1:uxKTbiRnplE2SubchneSf4NChtxLJtOy9VdHnQMT0d0=
-github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230515153606-95b02d76b04d/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
-github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520170006-429b9785c080 h1:W3itqKpRX9FhheKiAxdmuOBy/mjDfMf2G1vcuFIYqZc=
-github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520170006-429b9785c080/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520182345-041be06d5881 h1:dafqVivljYk51VLFnnpTXJnfWDe637EobWZ1l8PyEf8=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520182345-041be06d5881/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
 github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
@@ -42,21 +36,11 @@ github.com/go-openapi/spec v0.20.4/go.mod h1:faYFR1CvsJZ0mNsmsphTMSoRrNV3TEDoAM7
 github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk=
 github.com/go-openapi/swag v0.19.15 h1:D2NRCBzS9/pEY3gP9Nl8aDqGUcPFrwG2p+CNFrLyrCM=
 github.com/go-openapi/swag v0.19.15/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ=
-github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf h1:VJfSn8hIDE+K5+h38M3iAyFXrxpRExMKRdTk33UDxsw=
-github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf/go.mod h1:wc0fJ9V04yiYTfgKvE5RUUSRQ5Kzi0Bo4I+U3nNOUuA=
-github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4 h1:+3KPDf4Wv1VHOkzAfZnlj9qakLSYggTpm80AswhD/FU=
-github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4/go.mod h1:VY0s5KoAI2jRCvQXKuDeEEe8KG7VaWifSNJSk+E1KtY=
-github.com/go-skynet/go-gpt2.cpp v0.0.0-20230512145559-7bff56f02245 h1:IcfYY5uH0DdDXEJKJ8bq0WZCd9guPPd3xllaWNy8LOk=
-github.com/go-skynet/go-gpt2.cpp v0.0.0-20230512145559-7bff56f02245/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
-github.com/go-skynet/go-llama.cpp v0.0.0-20230520082618-a298043ef5f1 h1:i0oM2MERUgMIRmjOcv22TDQULxbmY8o9rZKLKKyWXLo=
-github.com/go-skynet/go-llama.cpp v0.0.0-20230520082618-a298043ef5f1/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
-github.com/go-skynet/go-llama.cpp v0.0.0-20230520155239-ccf23adfb278 h1:st4ow9JKy3UuhkwutrbWof2vMFU/YxwBCLYZ1IxJ2Po=
-github.com/go-skynet/go-llama.cpp v0.0.0-20230520155239-ccf23adfb278/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
+github.com/go-skynet/go-gpt2.cpp v0.0.0-20230523153133-3eb3a32c0874 h1:/6QWh2oarU7iPSpXj/3bLlkKptyxjKTRrNtGUrh8vhI=
+github.com/go-skynet/go-gpt2.cpp v0.0.0-20230523153133-3eb3a32c0874/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
 github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
-github.com/gofiber/fiber/v2 v2.45.0 h1:p4RpkJT9GAW6parBSbcNFH2ApnAuW3OzaQzbOCoDu+s=
-github.com/gofiber/fiber/v2 v2.45.0/go.mod h1:DNl0/c37WLe0g92U6lx1VMQuxGUQY5V7EIaVoEsUffc=
 github.com/gofiber/fiber/v2 v2.46.0 h1:wkkWotblsGVlLjXj2dpgKQAYHtXumsK/HyFugQM68Ns=
 github.com/gofiber/fiber/v2 v2.46.0/go.mod h1:DNl0/c37WLe0g92U6lx1VMQuxGUQY5V7EIaVoEsUffc=
 github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
@@ -95,20 +79,8 @@ github.com/mattn/go-isatty v0.0.18 h1:DOKFKCQ7FNG2L1rbrmstDN4QVRdS89Nkh85u68Uwp9
 github.com/mattn/go-isatty v0.0.18/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
 github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
 github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
-github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642 h1:KTkh3lOUsGqQyP4v+oa38sPFdrZtNnM4HaxTb3epdYs=
-github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642/go.mod h1:8ufRkpz/S/9ahkaxzZ5i4WMgO9w4InEhuRoT7vK5Rnw=
 github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs=
 github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
-github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230519014017-914519e772fd h1:kMnZASxCNc8GsPuAV94tltEsfT6T+esuB+rgzdjwFVM=
-github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230519014017-914519e772fd/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
-github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230521011615-aba1147a2253 h1:7udNpoHYOBktcpCEe8aDaPJ0LyzyRhVjpzAGFjPxPkY=
-github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230521011615-aba1147a2253/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
-github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522155256-c8c95ab46f92 h1:brOLJSsTLnFK2vUVi7MaVdxAEhHkOsoboR0vR5WW1HU=
-github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522155256-c8c95ab46f92/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
-github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522200803-5ca8767c81a2 h1:3368tGU1ooRSPw0zMvXqv9wLMxS82LzEkVSuo8DWZBI=
-github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522200803-5ca8767c81a2/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
-github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522220313-2ce22208a3dd h1:is/rE0YD8oEWcX3fQ+VxoS3fD0LqFEmTxh8XZegYYsA=
-github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522220313-2ce22208a3dd/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/onsi/ginkgo/v2 v2.9.5 h1:+6Hr4uxzP4XIUyAkg61dWBw8lb/gc4/X5luuxN/EC+Q=
 github.com/onsi/ginkgo/v2 v2.9.5/go.mod h1:tvAoo1QUJwNEU2ITftXTpR7R1RbCzoZUOs3RonqW57k=
 github.com/onsi/gomega v1.27.7 h1:fVih9JD6ogIiHUN6ePK7HJidyEDpWGVB5mzM7cWNXoU=
diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go
index b5e43a38..dc593a7c 100644
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -10,7 +10,7 @@ import (
 	"github.com/go-skynet/LocalAI/pkg/stablediffusion"
 	bloomz "github.com/go-skynet/bloomz.cpp"
 	bert "github.com/go-skynet/go-bert.cpp"
-	gpt2 "github.com/go-skynet/go-gpt2.cpp"
+	transformers "github.com/go-skynet/go-ggml-transformers.cpp"
 	llama "github.com/go-skynet/go-llama.cpp"
 	"github.com/hashicorp/go-multierror"
 	gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
@@ -23,9 +23,9 @@ const (
 	LlamaBackend           = "llama"
 	BloomzBackend          = "bloomz"
 	StarcoderBackend       = "starcoder"
-	StableLMBackend        = "stablelm"
+	GPTJBackend            = "gptj"
 	DollyBackend           = "dolly"
-	RedPajamaBackend       = "redpajama"
+	MPTBackend             = "mpt"
 	GPTNeoXBackend         = "gptneox"
 	ReplitBackend          = "replit"
 	Gpt2Backend            = "gpt2"
@@ -43,41 +43,41 @@ var backends []string = []string{
 	Gpt4AllLlamaBackend,
 	Gpt4AllMptBackend,
 	Gpt4AllJBackend,
-	Gpt2Backend,
-	WhisperBackend,
 	RwkvBackend,
-	BloomzBackend,
-	StableLMBackend,
-	DollyBackend,
-	RedPajamaBackend,
-	ReplitBackend,
 	GPTNeoXBackend,
+	WhisperBackend,
 	BertEmbeddingsBackend,
+	GPTJBackend,
+	Gpt2Backend,
+	DollyBackend,
+	MPTBackend,
+	ReplitBackend,
 	StarcoderBackend,
+	BloomzBackend,
 }
 
 var starCoder = func(modelFile string) (interface{}, error) {
-	return gpt2.NewStarcoder(modelFile)
+	return transformers.NewStarcoder(modelFile)
 }
 
-var redPajama = func(modelFile string) (interface{}, error) {
-	return gpt2.NewRedPajama(modelFile)
+var mpt = func(modelFile string) (interface{}, error) {
+	return transformers.NewMPT(modelFile)
 }
 
 var dolly = func(modelFile string) (interface{}, error) {
-	return gpt2.NewDolly(modelFile)
+	return transformers.NewDolly(modelFile)
 }
 
 var gptNeoX = func(modelFile string) (interface{}, error) {
-	return gpt2.NewGPTNeoX(modelFile)
+	return transformers.NewGPTNeoX(modelFile)
 }
 
 var replit = func(modelFile string) (interface{}, error) {
-	return gpt2.NewReplit(modelFile)
+	return transformers.NewReplit(modelFile)
 }
 
-var stableLM = func(modelFile string) (interface{}, error) {
-	return gpt2.NewStableLM(modelFile)
+var gptJ = func(modelFile string) (interface{}, error) {
+	return transformers.NewGPTJ(modelFile)
 }
 
 var bertEmbeddings = func(modelFile string) (interface{}, error) {
@@ -87,8 +87,9 @@ var bertEmbeddings = func(modelFile string) (interface{}, error) {
 var bloomzLM = func(modelFile string) (interface{}, error) {
 	return bloomz.New(modelFile)
 }
-var gpt2LM = func(modelFile string) (interface{}, error) {
-	return gpt2.New(modelFile)
+
+var transformersLM = func(modelFile string) (interface{}, error) {
+	return transformers.New(modelFile)
 }
 
 var stableDiffusion = func(assetDir string) (interface{}, error) {
@@ -130,14 +131,14 @@ func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, lla
 		return ml.LoadModel(modelFile, llamaLM(llamaOpts...))
 	case BloomzBackend:
 		return ml.LoadModel(modelFile, bloomzLM)
-	case StableLMBackend:
-		return ml.LoadModel(modelFile, stableLM)
+	case GPTJBackend:
+		return ml.LoadModel(modelFile, gptJ)
 	case DollyBackend:
 		return ml.LoadModel(modelFile, dolly)
-	case RedPajamaBackend:
-		return ml.LoadModel(modelFile, redPajama)
+	case MPTBackend:
+		return ml.LoadModel(modelFile, mpt)
 	case Gpt2Backend:
-		return ml.LoadModel(modelFile, gpt2LM)
+		return ml.LoadModel(modelFile, transformersLM)
 	case GPTNeoXBackend:
 		return ml.LoadModel(modelFile, gptNeoX)
 	case ReplitBackend:
diff --git a/tests/models_fixtures/embeddings.yaml b/tests/models_fixtures/embeddings.yaml
index b90ca75a..46a08502 100644
--- a/tests/models_fixtures/embeddings.yaml
+++ b/tests/models_fixtures/embeddings.yaml
@@ -1,6 +1,5 @@
 name: text-embedding-ada-002
 parameters:
   model: bert
-threads: 14
 backend: bert-embeddings
 embeddings: true
diff --git a/tests/models_fixtures/rwkv.yaml b/tests/models_fixtures/rwkv.yaml
index d78f5cf9..3b47fa0a 100644
--- a/tests/models_fixtures/rwkv.yaml
+++ b/tests/models_fixtures/rwkv.yaml
@@ -6,7 +6,6 @@ parameters:
   max_tokens: 100
   top_p: 0.8
 context_size: 1024
-threads: 14
 backend: "rwkv"
 cutwords:
 - "Bob:.*"

From 5807d0b766c7ea8ea858244548c9c8c6f3cb1322 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 23 May 2023 22:07:49 +0200
Subject: [PATCH 019/137] docs: update README

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 7b9287e1..4fe922a2 100644
--- a/README.md
+++ b/README.md
@@ -39,6 +39,7 @@ LocalAI uses C++ bindings for optimizing speed. It is based on [llama.cpp](https
 
 ## News
 
+- 23-05-2023: __v1.15.0__ released. `go-gpt2.cpp` backend got renamed to `go-ggml-transformers.cpp` updated including https://github.com/ggerganov/llama.cpp/pull/1508 which breaks compatibility with older models. This impacts RedPajama, GptNeoX, MPT(not `gpt4all-mpt`), Dolly, GPT2 and Starcoder based models.
 - 21-05-2023: __v1.14.0__ released. Minor updates to the `/models/apply` endpoint, `llama.cpp` backend updated including https://github.com/ggerganov/llama.cpp/pull/1508 which breaks compatibility with older models. `gpt4all` is still compatible with the old format. 
 - 19-05-2023: __v1.13.0__ released! 🔥🔥 updates to the `gpt4all` and `llama` backend, consolidated CUDA support ( https://github.com/go-skynet/LocalAI/pull/310 thanks to @bubthegreat and @Thireus ), preliminar support for [installing models via API](https://github.com/go-skynet/LocalAI#advanced-prepare-models-using-the-api).
 - 17-05-2023:  __v1.12.0__ released! 🔥🔥 Minor fixes, plus CUDA (https://github.com/go-skynet/LocalAI/pull/258) support for `llama.cpp`-compatible models and image generation (https://github.com/go-skynet/LocalAI/pull/272).
@@ -129,7 +130,7 @@ Depending on the model you are attempting to run might need more RAM or CPU reso
 | [gpt4all-llama](https://github.com/nomic-ai/gpt4all)      | Vicuna, Alpaca, LLaMa | yes                      | no                        | no                                | yes                  |
 | [gpt4all-mpt](https://github.com/nomic-ai/gpt4all)          | MPT                   | yes                      | no                        | no                                | yes                  |
 | [gpt4all-j](https://github.com/nomic-ai/gpt4all)           | GPT4ALL-J             | yes                      | no                        | no                                | yes                  |
-| [gpt2](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp))             | GPT/NeoX, Cerebras    | yes                      | no                        | no                                | no                   |
+| [gpt2](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp))             | GPT2, Cerebras    | yes                      | no                        | no                                | no                   |
 | [dolly](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp))            | Dolly                 | yes                      | no                        | no                                | no                   |
 | [gptj](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp))        | GPTJ             | yes                      | no                        | no                                | no                   |
 | [mpt](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp))         | MPT     | yes                      | no                        | no                                | no                   |

From 891af1c524ad3c5123bf3d228780d62f85d3cda0 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 23 May 2023 22:09:51 +0200
Subject: [PATCH 020/137] docs: update README

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 4fe922a2..7a2d89a2 100644
--- a/README.md
+++ b/README.md
@@ -39,7 +39,7 @@ LocalAI uses C++ bindings for optimizing speed. It is based on [llama.cpp](https
 
 ## News
 
-- 23-05-2023: __v1.15.0__ released. `go-gpt2.cpp` backend got renamed to `go-ggml-transformers.cpp` updated including https://github.com/ggerganov/llama.cpp/pull/1508 which breaks compatibility with older models. This impacts RedPajama, GptNeoX, MPT(not `gpt4all-mpt`), Dolly, GPT2 and Starcoder based models.
+- 23-05-2023: __v1.15.0__ released. `go-gpt2.cpp` backend got renamed to `go-ggml-transformers.cpp` updated including https://github.com/ggerganov/llama.cpp/pull/1508 which breaks compatibility with older models. This impacts RedPajama, GptNeoX, MPT(not `gpt4all-mpt`), Dolly, GPT2 and Starcoder based models. [Binary releases available](https://github.com/go-skynet/LocalAI/releases), various fixes, including https://github.com/go-skynet/LocalAI/pull/341 .
 - 21-05-2023: __v1.14.0__ released. Minor updates to the `/models/apply` endpoint, `llama.cpp` backend updated including https://github.com/ggerganov/llama.cpp/pull/1508 which breaks compatibility with older models. `gpt4all` is still compatible with the old format. 
 - 19-05-2023: __v1.13.0__ released! 🔥🔥 updates to the `gpt4all` and `llama` backend, consolidated CUDA support ( https://github.com/go-skynet/LocalAI/pull/310 thanks to @bubthegreat and @Thireus ), preliminar support for [installing models via API](https://github.com/go-skynet/LocalAI#advanced-prepare-models-using-the-api).
 - 17-05-2023:  __v1.12.0__ released! 🔥🔥 Minor fixes, plus CUDA (https://github.com/go-skynet/LocalAI/pull/258) support for `llama.cpp`-compatible models and image generation (https://github.com/go-skynet/LocalAI/pull/272).

From c822e18f0d3ca117381f8f85484b9181c414b53b Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Tue, 23 May 2023 23:09:48 +0200
Subject: [PATCH 021/137] :arrow_up: Update ggerganov/whisper.cpp (#364)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 03d0c851..f6c44af6 100644
--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,7 @@ GPT4ALL_VERSION?=8119ff4df0a99bde44255db2b8c7290b5582ac2b
 GOGGMLTRANSFORMERS_VERSION?=14fd6c9
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=07166da10cb2a9e8854395a4f210464dcea76e47
-WHISPER_CPP_VERSION?=041be06d5881d3c759cc4ed45d655804361237cd
+WHISPER_CPP_VERSION?=77eab3fbfe5e5462021d92dd230076bba06eefbc
 BERT_VERSION?=cea1ed76a7f48ef386a8e369f6c82c48cdf2d551
 BLOOMZ_VERSION?=e9366e82abdfe70565644fbfae9651976714efd1
 BUILD_TYPE?=

From e969604d75a06146d397af10620c7f063a049967 Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Tue, 23 May 2023 23:10:06 +0200
Subject: [PATCH 022/137] :arrow_up: Update go-skynet/go-llama.cpp (#365)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index f6c44af6..aa3e02bc 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@ GOTEST=$(GOCMD) test
 GOVET=$(GOCMD) vet
 BINARY_NAME=local-ai
 
-GOLLAMA_VERSION?=ccf23adfb278c0165d388389a5d60f3fe38e4854
+GOLLAMA_VERSION?=dcf8da632bceab8c41b0e0a2473a922a1867aa2e
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
 GPT4ALL_VERSION?=8119ff4df0a99bde44255db2b8c7290b5582ac2b
 GOGGMLTRANSFORMERS_VERSION?=14fd6c9

From 10e03bde35c5421122c65c59e1ca7adb502a4712 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Tue, 23 May 2023 23:10:27 +0200
Subject: [PATCH 023/137] fix(deps): update
 github.com/ggerganov/whisper.cpp/bindings/go digest to 77eab3f (#356)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index fe5afd43..3c39f65d 100644
--- a/go.mod
+++ b/go.mod
@@ -4,7 +4,7 @@ go 1.19
 
 require (
 	github.com/donomii/go-rwkv.cpp v0.0.0-20230515123100-6fdd0c338e56
-	github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520182345-041be06d5881
+	github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230523110439-77eab3fbfe5e
 	github.com/go-audio/wav v1.1.0
 	github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf
 	github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4
diff --git a/go.sum b/go.sum
index 78cc4d1c..de2c9183 100644
--- a/go.sum
+++ b/go.sum
@@ -18,6 +18,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520182345-041be06d5881 h1:dafqVivljYk51VLFnnpTXJnfWDe637EobWZ1l8PyEf8=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520182345-041be06d5881/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
+github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230523110439-77eab3fbfe5e h1:4PMorQuoUGAXmIzCtnNOHaasyLokXdgd8jUWwsraFTo=
+github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230523110439-77eab3fbfe5e/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
 github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
 github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs=
 github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA=

From 3ddea794e1d8dcd5c277d35b603d0b62e8dbb212 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Wed, 24 May 2023 10:08:36 +0200
Subject: [PATCH 024/137] fix(deps): update
 github.com/go-skynet/go-ggml-transformers.cpp digest to f89d7c2 (#361)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod |  2 +-
 go.sum | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 3c39f65d..1097daa8 100644
--- a/go.mod
+++ b/go.mod
@@ -8,7 +8,7 @@ require (
 	github.com/go-audio/wav v1.1.0
 	github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf
 	github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4
-	github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230523150735-8bfcb3ea6127
+	github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230523173010-f89d7c22df6b
 	github.com/go-skynet/go-llama.cpp v0.0.0-20230520155239-ccf23adfb278
 	github.com/gofiber/fiber/v2 v2.46.0
 	github.com/google/uuid v1.3.0
diff --git a/go.sum b/go.sum
index de2c9183..16b9a05c 100644
--- a/go.sum
+++ b/go.sum
@@ -16,6 +16,8 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/donomii/go-rwkv.cpp v0.0.0-20230515123100-6fdd0c338e56 h1:s8/MZdicstKi5fn9D9mKGIQ/q6IWCYCk/BM68i8v51w=
+github.com/donomii/go-rwkv.cpp v0.0.0-20230515123100-6fdd0c338e56/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520182345-041be06d5881 h1:dafqVivljYk51VLFnnpTXJnfWDe637EobWZ1l8PyEf8=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520182345-041be06d5881/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230523110439-77eab3fbfe5e h1:4PMorQuoUGAXmIzCtnNOHaasyLokXdgd8jUWwsraFTo=
@@ -38,8 +40,16 @@ github.com/go-openapi/spec v0.20.4/go.mod h1:faYFR1CvsJZ0mNsmsphTMSoRrNV3TEDoAM7
 github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk=
 github.com/go-openapi/swag v0.19.15 h1:D2NRCBzS9/pEY3gP9Nl8aDqGUcPFrwG2p+CNFrLyrCM=
 github.com/go-openapi/swag v0.19.15/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ=
+github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf h1:VJfSn8hIDE+K5+h38M3iAyFXrxpRExMKRdTk33UDxsw=
+github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf/go.mod h1:wc0fJ9V04yiYTfgKvE5RUUSRQ5Kzi0Bo4I+U3nNOUuA=
+github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4 h1:+3KPDf4Wv1VHOkzAfZnlj9qakLSYggTpm80AswhD/FU=
+github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4/go.mod h1:VY0s5KoAI2jRCvQXKuDeEEe8KG7VaWifSNJSk+E1KtY=
+github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230523173010-f89d7c22df6b h1:uKICsAbdRJxMPZ4RXltwOwXPRDO1/d/pdGR3gEEUV9M=
+github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230523173010-f89d7c22df6b/go.mod h1:hjmO5UfipWl6xkPT54acOs9DDto8GPV81IvsBcvRjsA=
 github.com/go-skynet/go-gpt2.cpp v0.0.0-20230523153133-3eb3a32c0874 h1:/6QWh2oarU7iPSpXj/3bLlkKptyxjKTRrNtGUrh8vhI=
 github.com/go-skynet/go-gpt2.cpp v0.0.0-20230523153133-3eb3a32c0874/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230520155239-ccf23adfb278 h1:st4ow9JKy3UuhkwutrbWof2vMFU/YxwBCLYZ1IxJ2Po=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230520155239-ccf23adfb278/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
 github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
@@ -81,8 +91,12 @@ github.com/mattn/go-isatty v0.0.18 h1:DOKFKCQ7FNG2L1rbrmstDN4QVRdS89Nkh85u68Uwp9
 github.com/mattn/go-isatty v0.0.18/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
 github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
 github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
+github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642 h1:KTkh3lOUsGqQyP4v+oa38sPFdrZtNnM4HaxTb3epdYs=
+github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642/go.mod h1:8ufRkpz/S/9ahkaxzZ5i4WMgO9w4InEhuRoT7vK5Rnw=
 github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs=
 github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522220313-2ce22208a3dd h1:is/rE0YD8oEWcX3fQ+VxoS3fD0LqFEmTxh8XZegYYsA=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522220313-2ce22208a3dd/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/onsi/ginkgo/v2 v2.9.5 h1:+6Hr4uxzP4XIUyAkg61dWBw8lb/gc4/X5luuxN/EC+Q=
 github.com/onsi/ginkgo/v2 v2.9.5/go.mod h1:tvAoo1QUJwNEU2ITftXTpR7R1RbCzoZUOs3RonqW57k=
 github.com/onsi/gomega v1.27.7 h1:fVih9JD6ogIiHUN6ePK7HJidyEDpWGVB5mzM7cWNXoU=

From 505572dae84c26c5a0f1a99225cc716bf561c4b5 Mon Sep 17 00:00:00 2001
From: Al <aorumbayev@pm.me>
Date: Wed, 24 May 2023 11:39:56 +0200
Subject: [PATCH 025/137] Add autogpt4all LocalAI usage example to links (#259)

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 7a2d89a2..ec7bcfc0 100644
--- a/README.md
+++ b/README.md
@@ -1013,6 +1013,7 @@ Feel free to open up a PR to get your project listed!
 - [Kairos](https://github.com/kairos-io/kairos)
 - [k8sgpt](https://github.com/k8sgpt-ai/k8sgpt#running-local-models)
 - [Spark](https://github.com/cedriking/spark)
+- [autogpt4all](https://github.com/aorumbayev/autogpt4all)
 
 ## Blog posts and other articles
 

From d12c1f7a4a17a6867dff659b36c67b4c392fab31 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Wed, 24 May 2023 11:40:16 +0200
Subject: [PATCH 026/137] fix(deps): update github.com/go-skynet/go-llama.cpp
 digest to dcf8da6 (#357)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 1097daa8..fd5f0c75 100644
--- a/go.mod
+++ b/go.mod
@@ -9,7 +9,7 @@ require (
 	github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf
 	github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4
 	github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230523173010-f89d7c22df6b
-	github.com/go-skynet/go-llama.cpp v0.0.0-20230520155239-ccf23adfb278
+	github.com/go-skynet/go-llama.cpp v0.0.0-20230523103108-dcf8da632bce
 	github.com/gofiber/fiber/v2 v2.46.0
 	github.com/google/uuid v1.3.0
 	github.com/hashicorp/go-multierror v1.1.1
diff --git a/go.sum b/go.sum
index 16b9a05c..1686f411 100644
--- a/go.sum
+++ b/go.sum
@@ -50,6 +50,8 @@ github.com/go-skynet/go-gpt2.cpp v0.0.0-20230523153133-3eb3a32c0874 h1:/6QWh2oar
 github.com/go-skynet/go-gpt2.cpp v0.0.0-20230523153133-3eb3a32c0874/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230520155239-ccf23adfb278 h1:st4ow9JKy3UuhkwutrbWof2vMFU/YxwBCLYZ1IxJ2Po=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230520155239-ccf23adfb278/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230523103108-dcf8da632bce h1:Mcq9LvYG4msXJvFUeiYI6PGftqmYbOoBxNfjyAAyFB4=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230523103108-dcf8da632bce/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
 github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=

From 29583a5ea54d5ef5471e5feb22a41745c45afe60 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Wed, 24 May 2023 11:40:36 +0200
Subject: [PATCH 027/137] fix(deps): update
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to b36a520 (#352)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index fd5f0c75..38dccf9a 100644
--- a/go.mod
+++ b/go.mod
@@ -15,7 +15,7 @@ require (
 	github.com/hashicorp/go-multierror v1.1.1
 	github.com/imdario/mergo v0.3.15
 	github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642
-	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522220313-2ce22208a3dd
+	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230523222017-b36a52020702
 	github.com/onsi/ginkgo/v2 v2.9.5
 	github.com/onsi/gomega v1.27.7
 	github.com/otiai10/openaigo v1.1.0
diff --git a/go.sum b/go.sum
index 1686f411..3b03e186 100644
--- a/go.sum
+++ b/go.sum
@@ -99,6 +99,8 @@ github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWb
 github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522220313-2ce22208a3dd h1:is/rE0YD8oEWcX3fQ+VxoS3fD0LqFEmTxh8XZegYYsA=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522220313-2ce22208a3dd/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230523222017-b36a52020702 h1:uya1G35AbUfVtG8fu/HuUGTFXpN7n9XuRAAvC1lTr+M=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230523222017-b36a52020702/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/onsi/ginkgo/v2 v2.9.5 h1:+6Hr4uxzP4XIUyAkg61dWBw8lb/gc4/X5luuxN/EC+Q=
 github.com/onsi/ginkgo/v2 v2.9.5/go.mod h1:tvAoo1QUJwNEU2ITftXTpR7R1RbCzoZUOs3RonqW57k=
 github.com/onsi/gomega v1.27.7 h1:fVih9JD6ogIiHUN6ePK7HJidyEDpWGVB5mzM7cWNXoU=

From 04797a80e19c1a8f4a93f1ecbbae1b2108f37dc3 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Wed, 24 May 2023 12:49:51 +0200
Subject: [PATCH 028/137] fix(deps): update
 github.com/go-skynet/go-ggml-transformers.cpp digest to c4c581f (#367)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 38dccf9a..3a98f868 100644
--- a/go.mod
+++ b/go.mod
@@ -8,7 +8,7 @@ require (
 	github.com/go-audio/wav v1.1.0
 	github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf
 	github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4
-	github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230523173010-f89d7c22df6b
+	github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230524084634-c4c581f1853c
 	github.com/go-skynet/go-llama.cpp v0.0.0-20230523103108-dcf8da632bce
 	github.com/gofiber/fiber/v2 v2.46.0
 	github.com/google/uuid v1.3.0
diff --git a/go.sum b/go.sum
index 3b03e186..338b627f 100644
--- a/go.sum
+++ b/go.sum
@@ -46,6 +46,8 @@ github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4 h1:+3KPDf4Wv
 github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4/go.mod h1:VY0s5KoAI2jRCvQXKuDeEEe8KG7VaWifSNJSk+E1KtY=
 github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230523173010-f89d7c22df6b h1:uKICsAbdRJxMPZ4RXltwOwXPRDO1/d/pdGR3gEEUV9M=
 github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230523173010-f89d7c22df6b/go.mod h1:hjmO5UfipWl6xkPT54acOs9DDto8GPV81IvsBcvRjsA=
+github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230524084634-c4c581f1853c h1:jXUOCh2K4OzRItTtHzdxvkylE9r1szRSleRpXCNvraY=
+github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230524084634-c4c581f1853c/go.mod h1:hjmO5UfipWl6xkPT54acOs9DDto8GPV81IvsBcvRjsA=
 github.com/go-skynet/go-gpt2.cpp v0.0.0-20230523153133-3eb3a32c0874 h1:/6QWh2oarU7iPSpXj/3bLlkKptyxjKTRrNtGUrh8vhI=
 github.com/go-skynet/go-gpt2.cpp v0.0.0-20230523153133-3eb3a32c0874/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230520155239-ccf23adfb278 h1:st4ow9JKy3UuhkwutrbWof2vMFU/YxwBCLYZ1IxJ2Po=

From 76c561a90876f24b1dca2224e595d6da0186e734 Mon Sep 17 00:00:00 2001
From: Robert Gracey <70551819+rgracey@users.noreply.github.com>
Date: Thu, 25 May 2023 00:27:54 +1000
Subject: [PATCH 029/137] chore: update README to include new Helm values
 (#369)

---
 README.md | 76 +++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 49 insertions(+), 27 deletions(-)

diff --git a/README.md b/README.md
index ec7bcfc0..33ad5aee 100644
--- a/README.md
+++ b/README.md
@@ -608,47 +608,69 @@ It should work, however you need to make sure you give enough resources to the c
 LocalAI can be installed inside Kubernetes with helm.
 
 <details>
+By default, the helm chart will install LocalAI instance using the ggml-gpt4all-j model without persistent storage.
 
 1. Add the helm repo
     ```bash
     helm repo add go-skynet https://go-skynet.github.io/helm-charts/
     ```
-1. Create a values files with your settings:
-```bash
-cat <<EOF > values.yaml
+2. Install the helm chart:
+    ```bash
+    helm repo update
+    helm install local-ai go-skynet/local-ai -f values.yaml
+    ```
+> **Note:** For further configuration options, see the [helm chart repository on GitHub](https://github.com/go-skynet/helm-charts).
+### Example values
+Deploy a single LocalAI pod with 6GB of persistent storage serving up a `ggml-gpt4all-j` model with custom prompt.
+```yaml
+### values.yaml
+
 deployment:
-  image: quay.io/go-skynet/local-ai:latest
+  # Adjust the number of threads and context size for model inference
   env:
-    threads: 4
-    contextSize: 1024
-    modelsPath: "/models"
-# Optionally create a PVC, mount the PV to the LocalAI Deployment,
-# and download a model to prepopulate the models directory
-modelsVolume:
-  enabled: true
-  url: "https://gpt4all.io/models/ggml-gpt4all-j.bin"
-  pvc:
+    threads: 14
+    contextSize: 512
+
+# Set the pod requests/limits
+resources:
+  limits:
+    cpu: 4000m
+    memory: 7000Mi
+  requests:
+    cpu: 100m
+    memory: 6000Mi
+
+# Add a custom prompt template for the ggml-gpt4all-j model
+promptTemplates:
+  # The name of the model this template belongs to
+  ggml-gpt4all-j.bin.tmpl: |
+    This is my custom prompt template...
+    ### Prompt:
+    {{.Input}}
+    ### Response:
+
+# Model configuration
+models:
+  # Don't re-download models on pod creation
+  forceDownload: false
+
+  # List of models to download and serve
+  list:
+    - url: "https://gpt4all.io/models/ggml-gpt4all-j.bin"
+       # Optional basic HTTP authentication
+      basicAuth: base64EncodedCredentials
+  
+  # Enable 6Gb of persistent storage models and prompt templates
+  persistence:
+    enabled: true
     size: 6Gi
-    accessModes:
-    - ReadWriteOnce
-  auth:
-    # Optional value for HTTP basic access authentication header
-    basic: "" # 'username:password' base64 encoded
+
 service:
   type: ClusterIP
   annotations: {}
   # If using an AWS load balancer, you'll need to override the default 60s load balancer idle timeout
   # service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
-EOF
 ```
-3. Install the helm chart:
-```bash
-helm repo update
-helm install local-ai go-skynet/local-ai -f values.yaml
-```
-
-Check out also the [helm chart repository on GitHub](https://github.com/go-skynet/helm-charts).
-
 </details>
 
 ## Supported OpenAI API endpoints

From c8cc197ddde1b47eb05d8cea21102f5c1895c91c Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 24 May 2023 16:42:24 +0200
Subject: [PATCH 030/137] feat: add static builds (#370)

---
 Makefile | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index aa3e02bc..f63ba9bc 100644
--- a/Makefile
+++ b/Makefile
@@ -18,7 +18,7 @@ CUDA_LIBPATH?=/usr/local/cuda/lib64/
 STABLEDIFFUSION_VERSION?=c0748eca3642d58bcf9521108bcee46959c647dc
 GO_TAGS?=
 BUILD_ID?=git
-
+LD_FLAGS=?=
 OPTIONAL_TARGETS?=
 
 OS := $(shell uname -s)
@@ -41,6 +41,11 @@ ifeq ($(BUILD_TYPE),cublas)
 	export LLAMA_CUBLAS=1
 endif
 
+# glibc-static or glibc-devel-static required
+ifeq ($(STATIC),true)
+	LD_FLAGS=-linkmode external -extldflags -static
+endif
+
 ifeq ($(GO_TAGS),stablediffusion)
 	OPTIONAL_TARGETS+=go-stable-diffusion/libstablediffusion.a
 endif
@@ -197,7 +202,7 @@ build: prepare ## Build the project
 	$(info ${GREEN}I local-ai build info:${RESET})
 	$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
 	$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -tags "$(GO_TAGS)" -x -o $(BINARY_NAME) ./
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -x -o $(BINARY_NAME) ./
 
 dist: build
 	mkdir -p release

From 589dfae89f488dd7e320a70a9878257b824c4a04 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Wed, 24 May 2023 21:40:09 +0200
Subject: [PATCH 031/137] fix(deps): update
 github.com/ggerganov/whisper.cpp/bindings/go digest to 5e2b340 (#368)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 3a98f868..155a53d8 100644
--- a/go.mod
+++ b/go.mod
@@ -4,7 +4,7 @@ go 1.19
 
 require (
 	github.com/donomii/go-rwkv.cpp v0.0.0-20230515123100-6fdd0c338e56
-	github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230523110439-77eab3fbfe5e
+	github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230524181101-5e2b3407ef46
 	github.com/go-audio/wav v1.1.0
 	github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf
 	github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4
diff --git a/go.sum b/go.sum
index 338b627f..07932f1e 100644
--- a/go.sum
+++ b/go.sum
@@ -22,6 +22,8 @@ github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520182345-041be06d5881
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520182345-041be06d5881/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230523110439-77eab3fbfe5e h1:4PMorQuoUGAXmIzCtnNOHaasyLokXdgd8jUWwsraFTo=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230523110439-77eab3fbfe5e/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
+github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230524181101-5e2b3407ef46 h1:+STJWsBFikYC90LnR8I9gcBdysQn7Jv9Jb44+5WBi68=
+github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230524181101-5e2b3407ef46/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
 github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
 github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs=
 github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA=

From bf54b7827061829d18edd03266479d7ec93ed4fc Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 24 May 2023 22:19:13 +0200
Subject: [PATCH 032/137] feat: add /healthz and /readyz endpoints for
 kubernetes (#374)

---
 api/api.go | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/api/api.go b/api/api.go
index b8d77f20..872d3edd 100644
--- a/api/api.go
+++ b/api/api.go
@@ -114,6 +114,14 @@ func App(opts ...AppOption) *fiber.App {
 		app.Static("/generated-images", options.imageDir)
 	}
 
+	ok := func(c *fiber.Ctx) error {
+		return c.SendStatus(200)
+	}
+
+	// Kubernetes health checks
+	app.Get("/healthz", ok)
+	app.Get("/readyz", ok)
+
 	// models
 	app.Get("/v1/models", listModels(options.loader, cm))
 	app.Get("/models", listModels(options.loader, cm))

From eee41cbe2b66a8edd907de518c8f992ed43b5b7a Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Thu, 25 May 2023 00:36:57 +0200
Subject: [PATCH 033/137] :arrow_up: Update go-skynet/go-llama.cpp (#373)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index f63ba9bc..801b3aa8 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@ GOTEST=$(GOCMD) test
 GOVET=$(GOCMD) vet
 BINARY_NAME=local-ai
 
-GOLLAMA_VERSION?=dcf8da632bceab8c41b0e0a2473a922a1867aa2e
+GOLLAMA_VERSION?=8bd97d532e90cf34e755b3ea2d8aa17000443cf2
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
 GPT4ALL_VERSION?=8119ff4df0a99bde44255db2b8c7290b5582ac2b
 GOGGMLTRANSFORMERS_VERSION?=14fd6c9

From babbd2374430581bbe10bd24cbf0f9c4693a3d05 Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Thu, 25 May 2023 00:37:36 +0200
Subject: [PATCH 034/137] :arrow_up: Update go-skynet/go-ggml-transformers.cpp
 (#363)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 801b3aa8..66b67ca9 100644
--- a/Makefile
+++ b/Makefile
@@ -6,7 +6,7 @@ BINARY_NAME=local-ai
 GOLLAMA_VERSION?=8bd97d532e90cf34e755b3ea2d8aa17000443cf2
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
 GPT4ALL_VERSION?=8119ff4df0a99bde44255db2b8c7290b5582ac2b
-GOGGMLTRANSFORMERS_VERSION?=14fd6c9
+GOGGMLTRANSFORMERS_VERSION?=c4c581f1853cf1b66276501c7c0dbea1e3e564b7
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=07166da10cb2a9e8854395a4f210464dcea76e47
 WHISPER_CPP_VERSION?=77eab3fbfe5e5462021d92dd230076bba06eefbc

From c22d06c7804bb74d2e1abb64e20c7e95921c6ad7 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Thu, 25 May 2023 13:35:32 +0200
Subject: [PATCH 035/137] fix(deps): update github.com/go-skynet/go-llama.cpp
 digest to 6e7e69a (#371)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 155a53d8..2671d2c6 100644
--- a/go.mod
+++ b/go.mod
@@ -9,7 +9,7 @@ require (
 	github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf
 	github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4
 	github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230524084634-c4c581f1853c
-	github.com/go-skynet/go-llama.cpp v0.0.0-20230523103108-dcf8da632bce
+	github.com/go-skynet/go-llama.cpp v0.0.0-20230524233806-6e7e69a1607e
 	github.com/gofiber/fiber/v2 v2.46.0
 	github.com/google/uuid v1.3.0
 	github.com/hashicorp/go-multierror v1.1.1
diff --git a/go.sum b/go.sum
index 07932f1e..0448f86a 100644
--- a/go.sum
+++ b/go.sum
@@ -56,6 +56,8 @@ github.com/go-skynet/go-llama.cpp v0.0.0-20230520155239-ccf23adfb278 h1:st4ow9JK
 github.com/go-skynet/go-llama.cpp v0.0.0-20230520155239-ccf23adfb278/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230523103108-dcf8da632bce h1:Mcq9LvYG4msXJvFUeiYI6PGftqmYbOoBxNfjyAAyFB4=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230523103108-dcf8da632bce/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230524233806-6e7e69a1607e h1:zfxPbHj7/hN2F7V12vfxCi4CFsaVO1WohW96OVFtfNw=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230524233806-6e7e69a1607e/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
 github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=

From 2a40f4402388dfbc2138e2fd768f0ae55842c4ee Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 25 May 2023 18:18:02 +0200
Subject: [PATCH 036/137] docs: Add Mods to projects list (#377)

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 33ad5aee..a0599010 100644
--- a/README.md
+++ b/README.md
@@ -1036,6 +1036,7 @@ Feel free to open up a PR to get your project listed!
 - [k8sgpt](https://github.com/k8sgpt-ai/k8sgpt#running-local-models)
 - [Spark](https://github.com/cedriking/spark)
 - [autogpt4all](https://github.com/aorumbayev/autogpt4all)
+- [Mods](https://github.com/charmbracelet/mods)
 
 ## Blog posts and other articles
 

From 917ff13c86b34e23c01ca836a34df4d02e6e095d Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 25 May 2023 22:43:25 +0200
Subject: [PATCH 037/137] docs: Update README

---
 README.md | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index a0599010..5b8017e4 100644
--- a/README.md
+++ b/README.md
@@ -63,8 +63,9 @@ Now LocalAI can generate images too:
 
 Twitter: [@LocalAI_API](https://twitter.com/LocalAI_API) and [@mudler_it](https://twitter.com/mudler_it)
 
-### Blogs and articles
+### Blogs, articles, media
 
+- [LocalAI meets k8sgpt](https://www.youtube.com/watch?v=PKrDNuJ_dfE) - CNCF Webinar showcasing LocalAI and k8sgpt.
 - [Question Answering on Documents locally with LangChain, LocalAI, Chroma, and GPT4All](https://mudler.pm/posts/localai-question-answering/) by Ettore Di Giacinto
 - [Tutorial to use k8sgpt with LocalAI](https://medium.com/@tyler_97636/k8sgpt-localai-unlock-kubernetes-superpowers-for-free-584790de9b65) - excellent usecase for localAI, using AI to analyse Kubernetes clusters. by Tyller Gillson
 
@@ -1038,11 +1039,6 @@ Feel free to open up a PR to get your project listed!
 - [autogpt4all](https://github.com/aorumbayev/autogpt4all)
 - [Mods](https://github.com/charmbracelet/mods)
 
-## Blog posts and other articles
-
-- https://medium.com/@tyler_97636/k8sgpt-localai-unlock-kubernetes-superpowers-for-free-584790de9b65
-- https://kairos.io/docs/examples/localai/
-
 ## Short-term roadmap
 
 - [x] Mimic OpenAI API (https://github.com/go-skynet/LocalAI/issues/10)

From cd9285bbe6c0d7454855f53ecf908b66fa6ed034 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Thu, 25 May 2023 22:46:34 +0200
Subject: [PATCH 038/137] fix(deps): update
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 63f5763 (#378)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 2671d2c6..ea12b48d 100644
--- a/go.mod
+++ b/go.mod
@@ -15,7 +15,7 @@ require (
 	github.com/hashicorp/go-multierror v1.1.1
 	github.com/imdario/mergo v0.3.15
 	github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642
-	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230523222017-b36a52020702
+	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230525153421-63f57635d83c
 	github.com/onsi/ginkgo/v2 v2.9.5
 	github.com/onsi/gomega v1.27.7
 	github.com/otiai10/openaigo v1.1.0
diff --git a/go.sum b/go.sum
index 0448f86a..43f6375a 100644
--- a/go.sum
+++ b/go.sum
@@ -107,6 +107,8 @@ github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522220313-2ce222
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522220313-2ce22208a3dd/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230523222017-b36a52020702 h1:uya1G35AbUfVtG8fu/HuUGTFXpN7n9XuRAAvC1lTr+M=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230523222017-b36a52020702/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230525153421-63f57635d83c h1:mDy1OKHlG9xv1KDMcOVNYQwoYKZSlb5Mu69W3+DNLYI=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230525153421-63f57635d83c/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/onsi/ginkgo/v2 v2.9.5 h1:+6Hr4uxzP4XIUyAkg61dWBw8lb/gc4/X5luuxN/EC+Q=
 github.com/onsi/ginkgo/v2 v2.9.5/go.mod h1:tvAoo1QUJwNEU2ITftXTpR7R1RbCzoZUOs3RonqW57k=
 github.com/onsi/gomega v1.27.7 h1:fVih9JD6ogIiHUN6ePK7HJidyEDpWGVB5mzM7cWNXoU=

From e891a467404c6c382edf699c07b287fbc441195c Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Thu, 25 May 2023 22:46:44 +0200
Subject: [PATCH 039/137] :arrow_up: Update nomic-ai/gpt4all (#362)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 66b67ca9..1dad999e 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
 
 GOLLAMA_VERSION?=8bd97d532e90cf34e755b3ea2d8aa17000443cf2
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
-GPT4ALL_VERSION?=8119ff4df0a99bde44255db2b8c7290b5582ac2b
+GPT4ALL_VERSION?=474c5387f97325fc7c44a515302a63fb4e9a5487
 GOGGMLTRANSFORMERS_VERSION?=c4c581f1853cf1b66276501c7c0dbea1e3e564b7
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=07166da10cb2a9e8854395a4f210464dcea76e47

From e350924ac1b0a8c21e7df982e616a5260e82c855 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Fri, 26 May 2023 00:00:03 +0200
Subject: [PATCH 040/137] fix(deps): update
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to afe3870 (#382)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index ea12b48d..304bfc24 100644
--- a/go.mod
+++ b/go.mod
@@ -15,7 +15,7 @@ require (
 	github.com/hashicorp/go-multierror v1.1.1
 	github.com/imdario/mergo v0.3.15
 	github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642
-	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230525153421-63f57635d83c
+	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230525202709-afe3870b7a29
 	github.com/onsi/ginkgo/v2 v2.9.5
 	github.com/onsi/gomega v1.27.7
 	github.com/otiai10/openaigo v1.1.0
diff --git a/go.sum b/go.sum
index 43f6375a..12a0f358 100644
--- a/go.sum
+++ b/go.sum
@@ -109,6 +109,8 @@ github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230523222017-b36a52
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230523222017-b36a52020702/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230525153421-63f57635d83c h1:mDy1OKHlG9xv1KDMcOVNYQwoYKZSlb5Mu69W3+DNLYI=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230525153421-63f57635d83c/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230525202709-afe3870b7a29 h1:hgml/PMZX3M+WigXD4BGy+mbD1oPxYbXJXo16I555Aw=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230525202709-afe3870b7a29/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/onsi/ginkgo/v2 v2.9.5 h1:+6Hr4uxzP4XIUyAkg61dWBw8lb/gc4/X5luuxN/EC+Q=
 github.com/onsi/ginkgo/v2 v2.9.5/go.mod h1:tvAoo1QUJwNEU2ITftXTpR7R1RbCzoZUOs3RonqW57k=
 github.com/onsi/gomega v1.27.7 h1:fVih9JD6ogIiHUN6ePK7HJidyEDpWGVB5mzM7cWNXoU=

From 925d7c3057327702a4f8e1675b403a95a5a58bcf Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Fri, 26 May 2023 00:43:31 +0200
Subject: [PATCH 041/137] fix(deps): update
 github.com/go-skynet/go-ggml-transformers.cpp digest to 4f18e5e (#381)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 304bfc24..4962de42 100644
--- a/go.mod
+++ b/go.mod
@@ -8,7 +8,7 @@ require (
 	github.com/go-audio/wav v1.1.0
 	github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf
 	github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4
-	github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230524084634-c4c581f1853c
+	github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230525204055-4f18e5eb7508
 	github.com/go-skynet/go-llama.cpp v0.0.0-20230524233806-6e7e69a1607e
 	github.com/gofiber/fiber/v2 v2.46.0
 	github.com/google/uuid v1.3.0
diff --git a/go.sum b/go.sum
index 12a0f358..7267d919 100644
--- a/go.sum
+++ b/go.sum
@@ -50,6 +50,8 @@ github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230523173010-f89d7c22df6b
 github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230523173010-f89d7c22df6b/go.mod h1:hjmO5UfipWl6xkPT54acOs9DDto8GPV81IvsBcvRjsA=
 github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230524084634-c4c581f1853c h1:jXUOCh2K4OzRItTtHzdxvkylE9r1szRSleRpXCNvraY=
 github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230524084634-c4c581f1853c/go.mod h1:hjmO5UfipWl6xkPT54acOs9DDto8GPV81IvsBcvRjsA=
+github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230525204055-4f18e5eb7508 h1:pb7wUQlgqbakB4vILBq44iLe5w9bcjAsP7js2iFOWX8=
+github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230525204055-4f18e5eb7508/go.mod h1:hjmO5UfipWl6xkPT54acOs9DDto8GPV81IvsBcvRjsA=
 github.com/go-skynet/go-gpt2.cpp v0.0.0-20230523153133-3eb3a32c0874 h1:/6QWh2oarU7iPSpXj/3bLlkKptyxjKTRrNtGUrh8vhI=
 github.com/go-skynet/go-gpt2.cpp v0.0.0-20230523153133-3eb3a32c0874/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230520155239-ccf23adfb278 h1:st4ow9JKy3UuhkwutrbWof2vMFU/YxwBCLYZ1IxJ2Po=

From 8615646827ab8b22a6e6ffc29c92c147e977369d Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Fri, 26 May 2023 00:43:48 +0200
Subject: [PATCH 042/137] fix(deps): update
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to d1ff713 (#383)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 4962de42..4fb6a209 100644
--- a/go.mod
+++ b/go.mod
@@ -15,7 +15,7 @@ require (
 	github.com/hashicorp/go-multierror v1.1.1
 	github.com/imdario/mergo v0.3.15
 	github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642
-	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230525202709-afe3870b7a29
+	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230525210850-d1ff7132c553
 	github.com/onsi/ginkgo/v2 v2.9.5
 	github.com/onsi/gomega v1.27.7
 	github.com/otiai10/openaigo v1.1.0
diff --git a/go.sum b/go.sum
index 7267d919..b19d0bab 100644
--- a/go.sum
+++ b/go.sum
@@ -113,6 +113,8 @@ github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230525153421-63f576
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230525153421-63f57635d83c/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230525202709-afe3870b7a29 h1:hgml/PMZX3M+WigXD4BGy+mbD1oPxYbXJXo16I555Aw=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230525202709-afe3870b7a29/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230525210850-d1ff7132c553 h1:+zQQHEoOaVUT72uLr6OJF+Lj35LR620aeeyrF7K6x5s=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230525210850-d1ff7132c553/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/onsi/ginkgo/v2 v2.9.5 h1:+6Hr4uxzP4XIUyAkg61dWBw8lb/gc4/X5luuxN/EC+Q=
 github.com/onsi/ginkgo/v2 v2.9.5/go.mod h1:tvAoo1QUJwNEU2ITftXTpR7R1RbCzoZUOs3RonqW57k=
 github.com/onsi/gomega v1.27.7 h1:fVih9JD6ogIiHUN6ePK7HJidyEDpWGVB5mzM7cWNXoU=

From 320e430c7f26723a0195e6033e6a6b44daa10333 Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Fri, 26 May 2023 09:57:03 +0200
Subject: [PATCH 043/137] :arrow_up: Update nomic-ai/gpt4all (#384)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 1dad999e..f17c79f4 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
 
 GOLLAMA_VERSION?=8bd97d532e90cf34e755b3ea2d8aa17000443cf2
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
-GPT4ALL_VERSION?=474c5387f97325fc7c44a515302a63fb4e9a5487
+GPT4ALL_VERSION?=810a3b12ccd0b0ee82b55d7ddbcb1db7d345dd06
 GOGGMLTRANSFORMERS_VERSION?=c4c581f1853cf1b66276501c7c0dbea1e3e564b7
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=07166da10cb2a9e8854395a4f210464dcea76e47

From a44c8e9b4ece0003e9ef0f5ae4a256d59af1c607 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 26 May 2023 15:28:26 +0200
Subject: [PATCH 044/137] ci: set flakeAttempts (#386)

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index f17c79f4..8f9b5a48 100644
--- a/Makefile
+++ b/Makefile
@@ -228,7 +228,7 @@ test-models/testmodel:
 
 test: prepare test-models/testmodel
 	cp tests/models_fixtures/* test-models
-	C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo -v -r ./api ./pkg
+	C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flakeAttempts 5 -v -r ./api ./pkg
 
 ## Help:
 help: ## Show this help.

From 62365fa31dfc32625e923aff00f5149c4fe5d1cb Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Fri, 26 May 2023 18:32:00 +0200
Subject: [PATCH 045/137] fix(deps): update
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to a6f3e94 (#387)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 4fb6a209..50cb2c7e 100644
--- a/go.mod
+++ b/go.mod
@@ -15,7 +15,7 @@ require (
 	github.com/hashicorp/go-multierror v1.1.1
 	github.com/imdario/mergo v0.3.15
 	github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642
-	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230525210850-d1ff7132c553
+	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230526132403-a6f3e94458e2
 	github.com/onsi/ginkgo/v2 v2.9.5
 	github.com/onsi/gomega v1.27.7
 	github.com/otiai10/openaigo v1.1.0
diff --git a/go.sum b/go.sum
index b19d0bab..fbb2816b 100644
--- a/go.sum
+++ b/go.sum
@@ -115,6 +115,8 @@ github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230525202709-afe387
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230525202709-afe3870b7a29/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230525210850-d1ff7132c553 h1:+zQQHEoOaVUT72uLr6OJF+Lj35LR620aeeyrF7K6x5s=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230525210850-d1ff7132c553/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230526132403-a6f3e94458e2 h1:DE++nIPuUGk8pz71PF0BITX+CTF0lv4ZNWv12qCBUVk=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230526132403-a6f3e94458e2/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/onsi/ginkgo/v2 v2.9.5 h1:+6Hr4uxzP4XIUyAkg61dWBw8lb/gc4/X5luuxN/EC+Q=
 github.com/onsi/ginkgo/v2 v2.9.5/go.mod h1:tvAoo1QUJwNEU2ITftXTpR7R1RbCzoZUOs3RonqW57k=
 github.com/onsi/gomega v1.27.7 h1:fVih9JD6ogIiHUN6ePK7HJidyEDpWGVB5mzM7cWNXoU=

From 53c83f2faeb330223bf028504ab524ae1e9cecf5 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 26 May 2023 18:34:02 +0200
Subject: [PATCH 046/137] image: add HEALTHCHECK (#388)

Signed-off-by: mudler <mudler@mocaccino.org>
---
 Dockerfile     | 6 +++++-
 Dockerfile.dev | 6 +++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index a39b3e22..1f3830d6 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -3,9 +3,13 @@ ARG BUILD_TYPE=
 FROM golang:$GO_VERSION
 ENV REBUILD=true
 WORKDIR /build
-RUN apt-get update && apt-get install -y cmake libgomp1 libopenblas-dev libopenblas-base libopencv-dev libopencv-core-dev libopencv-core4.5 ca-certificates
+RUN apt-get update && apt-get install -y cmake curl libgomp1 libopenblas-dev libopenblas-base libopencv-dev libopencv-core-dev libopencv-core4.5 ca-certificates
 COPY . .
 RUN ln -s /usr/include/opencv4/opencv2/ /usr/include/opencv2
 RUN make build
+ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
+# Define the health check command
+HEALTHCHECK --interval=30s --timeout=360s --retries=10 \
+  CMD curl -f $HEALTHCHECK_ENDPOINT || exit 1
 EXPOSE 8080
 ENTRYPOINT [ "/build/entrypoint.sh" ]
diff --git a/Dockerfile.dev b/Dockerfile.dev
index d688f54b..1e355f1b 100644
--- a/Dockerfile.dev
+++ b/Dockerfile.dev
@@ -11,6 +11,10 @@ RUN make build
 
 FROM debian:$DEBIAN_VERSION
 COPY --from=builder /build/local-ai /usr/bin/local-ai
-RUN apt-get update && apt-get install -y ca-certificates
+RUN apt-get update && apt-get install -y ca-certificates curl
+ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
+# Define the health check command
+HEALTHCHECK --interval=30s --timeout=360s --retries=10 \
+  CMD curl -f $HEALTHCHECK_ENDPOINT || exit 1
 EXPOSE 8080
 ENTRYPOINT [ "/usr/bin/local-ai" ]
\ No newline at end of file

From 74e808b8c325c252072df276cd6af563dfd98a5b Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Fri, 26 May 2023 22:28:14 +0200
Subject: [PATCH 047/137] :arrow_up: Update nomic-ai/gpt4all (#389)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 8f9b5a48..396c1a03 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
 
 GOLLAMA_VERSION?=8bd97d532e90cf34e755b3ea2d8aa17000443cf2
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
-GPT4ALL_VERSION?=810a3b12ccd0b0ee82b55d7ddbcb1db7d345dd06
+GPT4ALL_VERSION?=73db20ba85fbbdc66a56e2619394c0eea40dc72b
 GOGGMLTRANSFORMERS_VERSION?=c4c581f1853cf1b66276501c7c0dbea1e3e564b7
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=07166da10cb2a9e8854395a4f210464dcea76e47

From 835a20610b6f43ebfb5d94b6efdb9c2380a476ee Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Fri, 26 May 2023 22:43:11 +0200
Subject: [PATCH 048/137] :arrow_up: Update ggerganov/whisper.cpp (#372)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 396c1a03..12447b0b 100644
--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,7 @@ GPT4ALL_VERSION?=73db20ba85fbbdc66a56e2619394c0eea40dc72b
 GOGGMLTRANSFORMERS_VERSION?=c4c581f1853cf1b66276501c7c0dbea1e3e564b7
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=07166da10cb2a9e8854395a4f210464dcea76e47
-WHISPER_CPP_VERSION?=77eab3fbfe5e5462021d92dd230076bba06eefbc
+WHISPER_CPP_VERSION?=5e2b3407ef46eccebe55a64c100401ab37cc0374
 BERT_VERSION?=cea1ed76a7f48ef386a8e369f6c82c48cdf2d551
 BLOOMZ_VERSION?=e9366e82abdfe70565644fbfae9651976714efd1
 BUILD_TYPE?=

From 76c881043e8e427f0131eb026079e7fe917cc010 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 27 May 2023 09:26:33 +0200
Subject: [PATCH 049/137] feat: allow to preload models before startup via env
 var or configs (#391)

---
 api/api.go     |  12 ++++++
 api/gallery.go | 113 ++++++++++++++++++++++++++++++++-----------------
 api/options.go |  13 ++++++
 main.go        |  12 ++++++
 4 files changed, 112 insertions(+), 38 deletions(-)

diff --git a/api/api.go b/api/api.go
index 872d3edd..dd5f3022 100644
--- a/api/api.go
+++ b/api/api.go
@@ -69,6 +69,18 @@ func App(opts ...AppOption) *fiber.App {
 	// Default middleware config
 	app.Use(recover.New())
 
+	if options.preloadJSONModels != "" {
+		if err := ApplyGalleryFromString(options.loader.ModelPath, options.preloadJSONModels, cm); err != nil {
+			return nil
+		}
+	}
+
+	if options.preloadModelsFromPath != "" {
+		if err := ApplyGalleryFromFile(options.loader.ModelPath, options.preloadModelsFromPath, cm); err != nil {
+			return nil
+		}
+	}
+
 	if options.cors {
 		if options.corsAllowOrigins == "" {
 			app.Use(cors.New())
diff --git a/api/gallery.go b/api/gallery.go
index 591b1b7a..b5b74b0d 100644
--- a/api/gallery.go
+++ b/api/gallery.go
@@ -2,10 +2,12 @@ package api
 
 import (
 	"context"
+	"encoding/json"
 	"fmt"
 	"io/ioutil"
 	"net/http"
 	"net/url"
+	"os"
 	"strings"
 	"sync"
 
@@ -40,6 +42,43 @@ func newGalleryApplier(modelPath string) *galleryApplier {
 		statuses:  make(map[string]*galleryOpStatus),
 	}
 }
+
+func applyGallery(modelPath string, req ApplyGalleryModelRequest, cm *ConfigMerger) error {
+	url, err := req.DecodeURL()
+	if err != nil {
+		return err
+	}
+
+	// Send a GET request to the URL
+	response, err := http.Get(url)
+	if err != nil {
+		return err
+	}
+	defer response.Body.Close()
+
+	// Read the response body
+	body, err := ioutil.ReadAll(response.Body)
+	if err != nil {
+		return err
+	}
+
+	// Unmarshal YAML data into a Config struct
+	var config gallery.Config
+	err = yaml.Unmarshal(body, &config)
+	if err != nil {
+		return err
+	}
+
+	config.Files = append(config.Files, req.AdditionalFiles...)
+
+	if err := gallery.Apply(modelPath, req.Name, &config, req.Overrides); err != nil {
+		return err
+	}
+
+	// Reload models
+	return cm.LoadConfigs(modelPath)
+}
+
 func (g *galleryApplier) updatestatus(s string, op *galleryOpStatus) {
 	g.Lock()
 	defer g.Unlock()
@@ -66,44 +105,7 @@ func (g *galleryApplier) start(c context.Context, cm *ConfigMerger) {
 					g.updatestatus(op.id, &galleryOpStatus{Error: e, Processed: true})
 				}
 
-				url, err := op.req.DecodeURL()
-				if err != nil {
-					updateError(err)
-					continue
-				}
-
-				// Send a GET request to the URL
-				response, err := http.Get(url)
-				if err != nil {
-					updateError(err)
-					continue
-				}
-				defer response.Body.Close()
-
-				// Read the response body
-				body, err := ioutil.ReadAll(response.Body)
-				if err != nil {
-					updateError(err)
-					continue
-				}
-
-				// Unmarshal YAML data into a Config struct
-				var config gallery.Config
-				err = yaml.Unmarshal(body, &config)
-				if err != nil {
-					updateError(fmt.Errorf("failed to unmarshal YAML: %v", err))
-					continue
-				}
-
-				config.Files = append(config.Files, op.req.AdditionalFiles...)
-
-				if err := gallery.Apply(g.modelPath, op.req.Name, &config, op.req.Overrides); err != nil {
-					updateError(err)
-					continue
-				}
-
-				// Reload models
-				if err := cm.LoadConfigs(g.modelPath); err != nil {
+				if err := applyGallery(g.modelPath, op.req, cm); err != nil {
 					updateError(err)
 					continue
 				}
@@ -114,6 +116,41 @@ func (g *galleryApplier) start(c context.Context, cm *ConfigMerger) {
 	}()
 }
 
+func ApplyGalleryFromFile(modelPath, s string, cm *ConfigMerger) error {
+	dat, err := os.ReadFile(s)
+	if err != nil {
+		return err
+	}
+	var requests []ApplyGalleryModelRequest
+	err = json.Unmarshal(dat, &requests)
+	if err != nil {
+		return err
+	}
+
+	for _, r := range requests {
+		if err := applyGallery(modelPath, r, cm); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+func ApplyGalleryFromString(modelPath, s string, cm *ConfigMerger) error {
+	var requests []ApplyGalleryModelRequest
+	err := json.Unmarshal([]byte(s), &requests)
+	if err != nil {
+		return err
+	}
+
+	for _, r := range requests {
+		if err := applyGallery(modelPath, r, cm); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
 // endpoints
 
 type ApplyGalleryModelRequest struct {
diff --git a/api/options.go b/api/options.go
index f99dda4f..ea7497c7 100644
--- a/api/options.go
+++ b/api/options.go
@@ -15,6 +15,8 @@ type Option struct {
 	debug, disableMessage           bool
 	imageDir                        string
 	cors                            bool
+	preloadJSONModels               string
+	preloadModelsFromPath           string
 	corsAllowOrigins                string
 }
 
@@ -53,6 +55,17 @@ func WithContext(ctx context.Context) AppOption {
 	}
 }
 
+func WithYAMLConfigPreload(configFile string) AppOption {
+	return func(o *Option) {
+		o.preloadModelsFromPath = configFile
+	}
+}
+
+func WithJSONStringPreload(configFile string) AppOption {
+	return func(o *Option) {
+		o.preloadJSONModels = configFile
+	}
+}
 func WithConfigFile(configFile string) AppOption {
 	return func(o *Option) {
 		o.configFile = configFile
diff --git a/main.go b/main.go
index b5105fe7..f391affc 100644
--- a/main.go
+++ b/main.go
@@ -53,6 +53,16 @@ func main() {
 				EnvVars:     []string{"MODELS_PATH"},
 				Value:       filepath.Join(path, "models"),
 			},
+			&cli.StringFlag{
+				Name:        "preload-models",
+				DefaultText: "A List of models to apply in JSON at start",
+				EnvVars:     []string{"PRELOAD_MODELS"},
+			},
+			&cli.StringFlag{
+				Name:        "preload-models-config",
+				DefaultText: "A List of models to apply at startup. Path to a YAML config file",
+				EnvVars:     []string{"PRELOAD_MODELS_CONFIG"},
+			},
 			&cli.StringFlag{
 				Name:        "config-file",
 				DefaultText: "Config file",
@@ -103,6 +113,8 @@ It uses llama.cpp, ggml and gpt4all as backend with golang c bindings.
 			fmt.Printf("Starting LocalAI using %d threads, with models path: %s\n", ctx.Int("threads"), ctx.String("models-path"))
 			return api.App(
 				api.WithConfigFile(ctx.String("config-file")),
+				api.WithJSONStringPreload(ctx.String("preload-models")),
+				api.WithYAMLConfigPreload(ctx.String("preload-models-config")),
 				api.WithModelLoader(model.NewModelLoader(ctx.String("models-path"))),
 				api.WithContextSize(ctx.Int("context-size")),
 				api.WithDebug(ctx.Bool("debug")),

From 217dbb448e1d6cee4aea90b464aaf2757191e79e Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 27 May 2023 14:29:11 +0200
Subject: [PATCH 050/137] feat: allow to set a prompt cache path and enable
 saving state (#395)

Signed-off-by: mudler <mudler@mocaccino.org>
---
 Makefile          |  2 +-
 api/config.go     | 40 ++++++++++++++++++++++------------------
 api/prediction.go | 19 ++++++++++++++++---
 3 files changed, 39 insertions(+), 22 deletions(-)

diff --git a/Makefile b/Makefile
index 12447b0b..1aea3651 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@ GOTEST=$(GOCMD) test
 GOVET=$(GOCMD) vet
 BINARY_NAME=local-ai
 
-GOLLAMA_VERSION?=8bd97d532e90cf34e755b3ea2d8aa17000443cf2
+GOLLAMA_VERSION?=fbec625895ba0c458f783b62c8569135c5e80d79
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
 GPT4ALL_VERSION?=73db20ba85fbbdc66a56e2619394c0eea40dc72b
 GOGGMLTRANSFORMERS_VERSION?=c4c581f1853cf1b66276501c7c0dbea1e3e564b7
diff --git a/api/config.go b/api/config.go
index 7e0d8264..42aecbe8 100644
--- a/api/config.go
+++ b/api/config.go
@@ -16,24 +16,28 @@ import (
 )
 
 type Config struct {
-	OpenAIRequest               `yaml:"parameters"`
-	Name                        string            `yaml:"name"`
-	StopWords                   []string          `yaml:"stopwords"`
-	Cutstrings                  []string          `yaml:"cutstrings"`
-	TrimSpace                   []string          `yaml:"trimspace"`
-	ContextSize                 int               `yaml:"context_size"`
-	F16                         bool              `yaml:"f16"`
-	Threads                     int               `yaml:"threads"`
-	Debug                       bool              `yaml:"debug"`
-	Roles                       map[string]string `yaml:"roles"`
-	Embeddings                  bool              `yaml:"embeddings"`
-	Backend                     string            `yaml:"backend"`
-	TemplateConfig              TemplateConfig    `yaml:"template"`
-	MirostatETA                 float64           `yaml:"mirostat_eta"`
-	MirostatTAU                 float64           `yaml:"mirostat_tau"`
-	Mirostat                    int               `yaml:"mirostat"`
-	NGPULayers                  int               `yaml:"gpu_layers"`
-	ImageGenerationAssets       string            `yaml:"asset_dir"`
+	OpenAIRequest         `yaml:"parameters"`
+	Name                  string            `yaml:"name"`
+	StopWords             []string          `yaml:"stopwords"`
+	Cutstrings            []string          `yaml:"cutstrings"`
+	TrimSpace             []string          `yaml:"trimspace"`
+	ContextSize           int               `yaml:"context_size"`
+	F16                   bool              `yaml:"f16"`
+	Threads               int               `yaml:"threads"`
+	Debug                 bool              `yaml:"debug"`
+	Roles                 map[string]string `yaml:"roles"`
+	Embeddings            bool              `yaml:"embeddings"`
+	Backend               string            `yaml:"backend"`
+	TemplateConfig        TemplateConfig    `yaml:"template"`
+	MirostatETA           float64           `yaml:"mirostat_eta"`
+	MirostatTAU           float64           `yaml:"mirostat_tau"`
+	Mirostat              int               `yaml:"mirostat"`
+	NGPULayers            int               `yaml:"gpu_layers"`
+	ImageGenerationAssets string            `yaml:"asset_dir"`
+
+	PromptCachePath string `yaml:"prompt_cache_path"`
+	PromptCacheAll  bool   `yaml:"prompt_cache_all"`
+
 	PromptStrings, InputStrings []string
 	InputToken                  [][]int
 }
diff --git a/api/prediction.go b/api/prediction.go
index 08a01e06..4ae1b69a 100644
--- a/api/prediction.go
+++ b/api/prediction.go
@@ -2,6 +2,8 @@ package api
 
 import (
 	"fmt"
+	"os"
+	"path/filepath"
 	"regexp"
 	"strings"
 	"sync"
@@ -102,7 +104,7 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c Config)
 	switch model := inferenceModel.(type) {
 	case *llama.LLama:
 		fn = func() ([]float32, error) {
-			predictOptions := buildLLamaPredictOptions(c)
+			predictOptions := buildLLamaPredictOptions(c, loader.ModelPath)
 			if len(tokens) > 0 {
 				return model.TokenEmbeddings(tokens, predictOptions...)
 			}
@@ -151,7 +153,7 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c Config)
 	}, nil
 }
 
-func buildLLamaPredictOptions(c Config) []llama.PredictOption {
+func buildLLamaPredictOptions(c Config, modelPath string) []llama.PredictOption {
 	// Generate the prediction using the language model
 	predictOptions := []llama.PredictOption{
 		llama.SetTemperature(c.Temperature),
@@ -161,6 +163,17 @@ func buildLLamaPredictOptions(c Config) []llama.PredictOption {
 		llama.SetThreads(c.Threads),
 	}
 
+	if c.PromptCacheAll {
+		predictOptions = append(predictOptions, llama.EnablePromptCacheAll)
+	}
+
+	if c.PromptCachePath != "" {
+		// Create parent directory
+		p := filepath.Join(modelPath, c.PromptCachePath)
+		os.MkdirAll(filepath.Dir(p), 0755)
+		predictOptions = append(predictOptions, llama.SetPathPromptCache(p))
+	}
+
 	if c.Mirostat != 0 {
 		predictOptions = append(predictOptions, llama.SetMirostat(c.Mirostat))
 	}
@@ -469,7 +482,7 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
 				model.SetTokenCallback(tokenCallback)
 			}
 
-			predictOptions := buildLLamaPredictOptions(c)
+			predictOptions := buildLLamaPredictOptions(c, loader.ModelPath)
 
 			str, er := model.Predict(
 				s,

From 59f79532497e1cdd66ba9c32ea624c10f9644ab0 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 27 May 2023 19:42:24 +0200
Subject: [PATCH 051/137] docs(examples): add AutoGPT (#397)

---
 README.md                            |  2 +-
 examples/README.md                   |  7 +++++
 examples/autoGPT/.env                |  5 ++++
 examples/autoGPT/README.md           | 32 +++++++++++++++++++++
 examples/autoGPT/docker-compose.yaml | 42 ++++++++++++++++++++++++++++
 5 files changed, 87 insertions(+), 1 deletion(-)
 create mode 100644 examples/autoGPT/.env
 create mode 100644 examples/autoGPT/README.md
 create mode 100644 examples/autoGPT/docker-compose.yaml

diff --git a/README.md b/README.md
index 5b8017e4..60f2ca65 100644
--- a/README.md
+++ b/README.md
@@ -1025,7 +1025,7 @@ There is the availability of localai-webui and chatbot-ui in the examples sectio
 
 <details>
 
-AutoGPT currently doesn't allow to set a different API URL, but there is a PR open for it, so this should be possible soon!
+Yes, see the [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/)!
 
 </details>
 
diff --git a/examples/README.md b/examples/README.md
index 44525178..0229441b 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -4,6 +4,13 @@ Here is a list of projects that can easily be integrated with the LocalAI backen
 
 ### Projects
 
+### AutoGPT
+
+_by [@mudler](https://github.com/mudler)_
+
+This example shows how to use AutoGPT with LocalAI.
+
+[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/autoGPT/)
 
 ### Chatbot-UI
 
diff --git a/examples/autoGPT/.env b/examples/autoGPT/.env
new file mode 100644
index 00000000..ca0e93e3
--- /dev/null
+++ b/examples/autoGPT/.env
@@ -0,0 +1,5 @@
+OPENAI_API_KEY=sk---anystringhere
+OPENAI_API_BASE=http://api:8080/v1
+# Models to preload at start
+# Here we configure gpt4all as gpt-3.5-turbo and bert as embeddings
+PRELOAD_MODELS=[{"url": "github:go-skynet/model-gallery/gpt4all-j.yaml", "name": "gpt-3.5-turbo"}, { "url": "github:go-skynet/model-gallery/bert-embeddings.yaml", "name": "text-embedding-ada-002"}]
\ No newline at end of file
diff --git a/examples/autoGPT/README.md b/examples/autoGPT/README.md
new file mode 100644
index 00000000..f5269a3a
--- /dev/null
+++ b/examples/autoGPT/README.md
@@ -0,0 +1,32 @@
+# AutoGPT
+
+Example of integration with [AutoGPT](https://github.com/Significant-Gravitas/Auto-GPT).
+
+## Run
+
+```bash
+# Clone LocalAI
+git clone https://github.com/go-skynet/LocalAI
+
+cd LocalAI/examples/autoGPT
+
+docker-compose run --rm auto-gpt
+```
+
+Note: The example automatically downloads the `gpt4all` model as it is under a permissive license. The GPT4All model does not seem to be enough to run AutoGPT. WizardLM-7b-uncensored seems to perform better (with `f16: true`).
+
+See the `.env` configuration file to set a different model with the [model-gallery](https://github.com/go-skynet/model-gallery) by editing `PRELOAD_MODELS`.
+
+## Without docker
+
+Run AutoGPT with `OPENAI_API_BASE` pointing to the LocalAI endpoint. If you run it locally for instance:
+
+```
+OPENAI_API_BASE=http://localhost:8080 python ...
+```
+
+Note: you need a model named `gpt-3.5-turbo` and `text-embedding-ada-002`. You can preload those in LocalAI at start by setting in the env:
+
+```
+PRELOAD_MODELS=[{"url": "github:go-skynet/model-gallery/gpt4all-j.yaml", "name": "gpt-3.5-turbo"}, { "url": "github:go-skynet/model-gallery/bert-embeddings.yaml", "name": "text-embedding-ada-002"}]
+```
\ No newline at end of file
diff --git a/examples/autoGPT/docker-compose.yaml b/examples/autoGPT/docker-compose.yaml
new file mode 100644
index 00000000..c3220ad1
--- /dev/null
+++ b/examples/autoGPT/docker-compose.yaml
@@ -0,0 +1,42 @@
+version: "3.9"
+services:
+  api:
+    image: quay.io/go-skynet/local-ai:latest
+    ports:
+      - 8080:8080
+    env_file:
+      - .env
+    environment:
+      - DEBUG=true
+      - MODELS_PATH=/models
+    volumes:
+      - ./models:/models:cached
+    command: ["/usr/bin/local-ai" ]
+  auto-gpt:
+    image: significantgravitas/auto-gpt
+    depends_on:
+      api:
+        condition: service_healthy
+      redis:
+        condition: service_started
+    env_file:
+      - .env
+    environment:
+      MEMORY_BACKEND: ${MEMORY_BACKEND:-redis}
+      REDIS_HOST: ${REDIS_HOST:-redis}
+    profiles: ["exclude-from-up"]
+    volumes:
+      - ./auto_gpt_workspace:/app/autogpt/auto_gpt_workspace
+      - ./data:/app/data
+      ## allow auto-gpt to write logs to disk
+      - ./logs:/app/logs
+      ## uncomment following lines if you want to make use of these files
+      ## you must have them existing in the same folder as this docker-compose.yml
+      #- type: bind
+      #  source: ./azure.yaml
+      #  target: /app/azure.yaml
+      #- type: bind
+      #  source: ./ai_settings.yaml
+      #  target: /app/ai_settings.yaml
+  redis:
+    image: "redis/redis-stack-server:latest"

From aac9a57500725e57ab00ecb201100d36e7cb739d Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Sat, 27 May 2023 19:42:38 +0200
Subject: [PATCH 052/137] fix(deps): update module github.com/imdario/mergo to
 v0.3.16 (#394)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 50cb2c7e..fd0614e8 100644
--- a/go.mod
+++ b/go.mod
@@ -13,7 +13,7 @@ require (
 	github.com/gofiber/fiber/v2 v2.46.0
 	github.com/google/uuid v1.3.0
 	github.com/hashicorp/go-multierror v1.1.1
-	github.com/imdario/mergo v0.3.15
+	github.com/imdario/mergo v0.3.16
 	github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642
 	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230526132403-a6f3e94458e2
 	github.com/onsi/ginkgo/v2 v2.9.5
diff --git a/go.sum b/go.sum
index fbb2816b..f2f310d3 100644
--- a/go.sum
+++ b/go.sum
@@ -79,6 +79,8 @@ github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9
 github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
 github.com/imdario/mergo v0.3.15 h1:M8XP7IuFNsqUx6VPK2P9OSmsYsI/YFaGil0uD21V3dM=
 github.com/imdario/mergo v0.3.15/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY=
+github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4=
+github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY=
 github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
 github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
 github.com/klauspost/compress v1.16.3 h1:XuJt9zzcnaz6a16/OU53ZjWp/v7/42WcR5t2a0PcNQY=

From 09641b979063e5774a12125c42dbc5cb2ecf3d33 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Sat, 27 May 2023 19:42:51 +0200
Subject: [PATCH 053/137] fix(deps): update
 github.com/ggerganov/whisper.cpp/bindings/go digest to 9b92684 (#392)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index fd0614e8..0231a547 100644
--- a/go.mod
+++ b/go.mod
@@ -4,7 +4,7 @@ go 1.19
 
 require (
 	github.com/donomii/go-rwkv.cpp v0.0.0-20230515123100-6fdd0c338e56
-	github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230524181101-5e2b3407ef46
+	github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230527074028-9b926844e3ae
 	github.com/go-audio/wav v1.1.0
 	github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf
 	github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4
diff --git a/go.sum b/go.sum
index f2f310d3..8f78f15a 100644
--- a/go.sum
+++ b/go.sum
@@ -24,6 +24,8 @@ github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230523110439-77eab3fbfe5e
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230523110439-77eab3fbfe5e/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230524181101-5e2b3407ef46 h1:+STJWsBFikYC90LnR8I9gcBdysQn7Jv9Jb44+5WBi68=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230524181101-5e2b3407ef46/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
+github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230527074028-9b926844e3ae h1:uzi5myq/qNX9xiKMRF/fW3HfxuEo2WcnTalwg9fe2hM=
+github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230527074028-9b926844e3ae/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
 github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
 github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs=
 github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA=

From cdfb930a69b72cde013c765604d3b5c38ed63976 Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Sat, 27 May 2023 22:30:11 +0200
Subject: [PATCH 054/137] :arrow_up: Update go-skynet/go-ggml-transformers.cpp
 (#385)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 1aea3651..859f5a07 100644
--- a/Makefile
+++ b/Makefile
@@ -6,7 +6,7 @@ BINARY_NAME=local-ai
 GOLLAMA_VERSION?=fbec625895ba0c458f783b62c8569135c5e80d79
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
 GPT4ALL_VERSION?=73db20ba85fbbdc66a56e2619394c0eea40dc72b
-GOGGMLTRANSFORMERS_VERSION?=c4c581f1853cf1b66276501c7c0dbea1e3e564b7
+GOGGMLTRANSFORMERS_VERSION?=4f18e5eb75089dc1fc8f1c955bb8f73d18520a46
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=07166da10cb2a9e8854395a4f210464dcea76e47
 WHISPER_CPP_VERSION?=5e2b3407ef46eccebe55a64c100401ab37cc0374

From 425beea6c561d82fabc9e97c5ffecdefda64038e Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Sat, 27 May 2023 22:30:24 +0200
Subject: [PATCH 055/137] :arrow_up: Update ggerganov/whisper.cpp (#398)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 859f5a07..8526872c 100644
--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,7 @@ GPT4ALL_VERSION?=73db20ba85fbbdc66a56e2619394c0eea40dc72b
 GOGGMLTRANSFORMERS_VERSION?=4f18e5eb75089dc1fc8f1c955bb8f73d18520a46
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=07166da10cb2a9e8854395a4f210464dcea76e47
-WHISPER_CPP_VERSION?=5e2b3407ef46eccebe55a64c100401ab37cc0374
+WHISPER_CPP_VERSION?=9b926844e3ae0ca6a0d13573b2e0349be1a4b573
 BERT_VERSION?=cea1ed76a7f48ef386a8e369f6c82c48cdf2d551
 BLOOMZ_VERSION?=e9366e82abdfe70565644fbfae9651976714efd1
 BUILD_TYPE?=

From e0d1a8995d376b4810a129c37723fe1a82369c1f Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Sun, 28 May 2023 08:40:24 +0200
Subject: [PATCH 056/137] fix(deps): update module
 github.com/sashabaranov/go-openai to v1.9.5 (#400)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 0231a547..756931b1 100644
--- a/go.mod
+++ b/go.mod
@@ -20,7 +20,7 @@ require (
 	github.com/onsi/gomega v1.27.7
 	github.com/otiai10/openaigo v1.1.0
 	github.com/rs/zerolog v1.29.1
-	github.com/sashabaranov/go-openai v1.9.4
+	github.com/sashabaranov/go-openai v1.9.5
 	github.com/swaggo/swag v1.16.1
 	github.com/urfave/cli/v2 v2.25.3
 	github.com/valyala/fasthttp v1.47.0
diff --git a/go.sum b/go.sum
index 8f78f15a..0071e7b8 100644
--- a/go.sum
+++ b/go.sum
@@ -144,6 +144,8 @@ github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf
 github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
 github.com/sashabaranov/go-openai v1.9.4 h1:KanoCEoowAI45jVXlenMCckutSRr39qOmSi9MyPBfZM=
 github.com/sashabaranov/go-openai v1.9.4/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
+github.com/sashabaranov/go-openai v1.9.5 h1:z1VCMXsfnug+U0ceTTIXr/L26AYl9jafqA9lptlSX0c=
+github.com/sashabaranov/go-openai v1.9.5/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
 github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 h1:rmMl4fXJhKMNWl+K+r/fq4FbbKI+Ia2m9hYBLm2h4G4=
 github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94/go.mod h1:90zrgN3D/WJsDd1iXHT96alCoN2KJo6/4x1DZC3wZs8=
 github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d/go.mod h1:Gy+0tqhJvgGlqnTF8CVGP0AaGRjwBtXs/a5PA0Y3+A4=

From 65d06285d84b4a3860fbec53f96d155fc2228130 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 28 May 2023 22:59:25 +0200
Subject: [PATCH 057/137] Bump rwkv (#402)

---
 Makefile                       | 8 ++++----
 examples/rwkv/scripts/build.sh | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/Makefile b/Makefile
index 8526872c..994d0f6f 100644
--- a/Makefile
+++ b/Makefile
@@ -7,8 +7,8 @@ GOLLAMA_VERSION?=fbec625895ba0c458f783b62c8569135c5e80d79
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
 GPT4ALL_VERSION?=73db20ba85fbbdc66a56e2619394c0eea40dc72b
 GOGGMLTRANSFORMERS_VERSION?=4f18e5eb75089dc1fc8f1c955bb8f73d18520a46
-RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
-RWKV_VERSION?=07166da10cb2a9e8854395a4f210464dcea76e47
+RWKV_REPO?=https://github.com/mudler/go-rwkv.cpp
+RWKV_VERSION?=dcbd34aff983b3d04fa300c5da5ec4bfdf6db295
 WHISPER_CPP_VERSION?=9b926844e3ae0ca6a0d13573b2e0349be1a4b573
 BERT_VERSION?=cea1ed76a7f48ef386a8e369f6c82c48cdf2d551
 BLOOMZ_VERSION?=e9366e82abdfe70565644fbfae9651976714efd1
@@ -202,7 +202,7 @@ build: prepare ## Build the project
 	$(info ${GREEN}I local-ai build info:${RESET})
 	$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
 	$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -x -o $(BINARY_NAME) ./
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
 
 dist: build
 	mkdir -p release
@@ -222,7 +222,7 @@ test-models/testmodel:
 	wget https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
 	wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O test-models/bert
 	wget https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
-	wget https://huggingface.co/imxcstar/rwkv-4-raven-ggml/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%25-Other1%25-20230425-ctx4096-16_Q4_2.bin -O test-models/rwkv
+	wget https://huggingface.co/mudler/rwkv-4-raven-1.5B-ggml/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%2525-Other1%2525-20230425-ctx4096_Q4_0.bin -O test-models/rwkv
 	wget https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O test-models/rwkv.tokenizer.json
 	cp tests/models_fixtures/* test-models
 
diff --git a/examples/rwkv/scripts/build.sh b/examples/rwkv/scripts/build.sh
index 2ecedbe2..37720582 100755
--- a/examples/rwkv/scripts/build.sh
+++ b/examples/rwkv/scripts/build.sh
@@ -8,4 +8,4 @@ FILENAME=$(basename $URL)
 wget -nc $URL -O /build/$FILENAME
 
 python3 /build/rwkv.cpp/rwkv/convert_pytorch_to_ggml.py /build/$FILENAME /build/float-model float16
-python3 /build/rwkv.cpp/rwkv/quantize.py /build/float-model $OUT Q4_2
+python3 /build/rwkv.cpp/rwkv/quantize.py /build/float-model $OUT Q4_0

From 4c0013fd795bdcd011fd229788837130a353b98f Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Mon, 29 May 2023 09:16:32 +0200
Subject: [PATCH 058/137] fix(deps): update
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 9eb81cb (#390)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 756931b1..ef2ad877 100644
--- a/go.mod
+++ b/go.mod
@@ -15,7 +15,7 @@ require (
 	github.com/hashicorp/go-multierror v1.1.1
 	github.com/imdario/mergo v0.3.16
 	github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642
-	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230526132403-a6f3e94458e2
+	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230528235700-9eb81cb54922
 	github.com/onsi/ginkgo/v2 v2.9.5
 	github.com/onsi/gomega v1.27.7
 	github.com/otiai10/openaigo v1.1.0
diff --git a/go.sum b/go.sum
index 0071e7b8..c1508d28 100644
--- a/go.sum
+++ b/go.sum
@@ -121,6 +121,8 @@ github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230525210850-d1ff71
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230525210850-d1ff7132c553/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230526132403-a6f3e94458e2 h1:DE++nIPuUGk8pz71PF0BITX+CTF0lv4ZNWv12qCBUVk=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230526132403-a6f3e94458e2/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230528235700-9eb81cb54922 h1:teYhrXxFY28gyBm6QMcYewA0KvLXqkUsgxJcYelaxbg=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230528235700-9eb81cb54922/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/onsi/ginkgo/v2 v2.9.5 h1:+6Hr4uxzP4XIUyAkg61dWBw8lb/gc4/X5luuxN/EC+Q=
 github.com/onsi/ginkgo/v2 v2.9.5/go.mod h1:tvAoo1QUJwNEU2ITftXTpR7R1RbCzoZUOs3RonqW57k=
 github.com/onsi/gomega v1.27.7 h1:fVih9JD6ogIiHUN6ePK7HJidyEDpWGVB5mzM7cWNXoU=

From 728f297bb8c390d0de7b95b1969cbf1ad1864ab4 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Mon, 29 May 2023 09:16:53 +0200
Subject: [PATCH 059/137] fix(deps): update
 github.com/ggerganov/whisper.cpp/bindings/go digest to d7c936b (#405)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index ef2ad877..52689e47 100644
--- a/go.mod
+++ b/go.mod
@@ -4,7 +4,7 @@ go 1.19
 
 require (
 	github.com/donomii/go-rwkv.cpp v0.0.0-20230515123100-6fdd0c338e56
-	github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230527074028-9b926844e3ae
+	github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230528233858-d7c936b44a80
 	github.com/go-audio/wav v1.1.0
 	github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf
 	github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4
diff --git a/go.sum b/go.sum
index c1508d28..faa717b3 100644
--- a/go.sum
+++ b/go.sum
@@ -26,6 +26,8 @@ github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230524181101-5e2b3407ef46
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230524181101-5e2b3407ef46/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230527074028-9b926844e3ae h1:uzi5myq/qNX9xiKMRF/fW3HfxuEo2WcnTalwg9fe2hM=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230527074028-9b926844e3ae/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
+github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230528233858-d7c936b44a80 h1:IeeVcNaQHdcG+GPg+meOPFvtonvO8p/HBzTrZGjpWZk=
+github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230528233858-d7c936b44a80/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
 github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
 github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs=
 github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA=

From 821cfed6c00b962979f014531b3e7957939327df Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Mon, 29 May 2023 14:42:08 +0200
Subject: [PATCH 060/137] fix(deps): update github.com/donomii/go-rwkv.cpp
 digest to ccb05c3 (#407)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 52689e47..9eefb6a6 100644
--- a/go.mod
+++ b/go.mod
@@ -3,7 +3,7 @@ module github.com/go-skynet/LocalAI
 go 1.19
 
 require (
-	github.com/donomii/go-rwkv.cpp v0.0.0-20230515123100-6fdd0c338e56
+	github.com/donomii/go-rwkv.cpp v0.0.0-20230529074347-ccb05c3e1c6e
 	github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230528233858-d7c936b44a80
 	github.com/go-audio/wav v1.1.0
 	github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf
diff --git a/go.sum b/go.sum
index faa717b3..75d4c9b3 100644
--- a/go.sum
+++ b/go.sum
@@ -18,6 +18,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/donomii/go-rwkv.cpp v0.0.0-20230515123100-6fdd0c338e56 h1:s8/MZdicstKi5fn9D9mKGIQ/q6IWCYCk/BM68i8v51w=
 github.com/donomii/go-rwkv.cpp v0.0.0-20230515123100-6fdd0c338e56/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
+github.com/donomii/go-rwkv.cpp v0.0.0-20230529074347-ccb05c3e1c6e h1:YbcLoxAwS0r7otEqU/d8bArubmfEJaG7dZPp0Aa52Io=
+github.com/donomii/go-rwkv.cpp v0.0.0-20230529074347-ccb05c3e1c6e/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520182345-041be06d5881 h1:dafqVivljYk51VLFnnpTXJnfWDe637EobWZ1l8PyEf8=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520182345-041be06d5881/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230523110439-77eab3fbfe5e h1:4PMorQuoUGAXmIzCtnNOHaasyLokXdgd8jUWwsraFTo=

From b57ea10c9469851238d8448c3fdcdd1b7d9774eb Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Mon, 29 May 2023 14:42:21 +0200
Subject: [PATCH 061/137] fix(deps): update github.com/go-skynet/go-bert.cpp
 digest to 771b4a0 (#408)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 9eefb6a6..013aef86 100644
--- a/go.mod
+++ b/go.mod
@@ -7,7 +7,7 @@ require (
 	github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230528233858-d7c936b44a80
 	github.com/go-audio/wav v1.1.0
 	github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf
-	github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4
+	github.com/go-skynet/go-bert.cpp v0.0.0-20230529074307-771b4a085972
 	github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230525204055-4f18e5eb7508
 	github.com/go-skynet/go-llama.cpp v0.0.0-20230524233806-6e7e69a1607e
 	github.com/gofiber/fiber/v2 v2.46.0
diff --git a/go.sum b/go.sum
index 75d4c9b3..6ec36f7f 100644
--- a/go.sum
+++ b/go.sum
@@ -52,6 +52,8 @@ github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf h1:VJfSn8hIDE
 github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf/go.mod h1:wc0fJ9V04yiYTfgKvE5RUUSRQ5Kzi0Bo4I+U3nNOUuA=
 github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4 h1:+3KPDf4Wv1VHOkzAfZnlj9qakLSYggTpm80AswhD/FU=
 github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4/go.mod h1:VY0s5KoAI2jRCvQXKuDeEEe8KG7VaWifSNJSk+E1KtY=
+github.com/go-skynet/go-bert.cpp v0.0.0-20230529074307-771b4a085972 h1:eiE1CTqanNjpNWF2xp9GvNZXgKgRzNaUSyFZGMLu8Vo=
+github.com/go-skynet/go-bert.cpp v0.0.0-20230529074307-771b4a085972/go.mod h1:IQrVVZiAuWpneNrahrGu3m7VVaKLDIvQGp+Q6B8jw5g=
 github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230523173010-f89d7c22df6b h1:uKICsAbdRJxMPZ4RXltwOwXPRDO1/d/pdGR3gEEUV9M=
 github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230523173010-f89d7c22df6b/go.mod h1:hjmO5UfipWl6xkPT54acOs9DDto8GPV81IvsBcvRjsA=
 github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230524084634-c4c581f1853c h1:jXUOCh2K4OzRItTtHzdxvkylE9r1szRSleRpXCNvraY=

From f5146bde18dae7f1413a05b60d5aa6e933b91cfd Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 29 May 2023 15:17:38 +0200
Subject: [PATCH 062/137] feat: add clblast support (#412)

Signed-off-by: mudler <mudler@mocaccino.org>
---
 Makefile | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 994d0f6f..4c5bb0de 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@ GOTEST=$(GOCMD) test
 GOVET=$(GOCMD) vet
 BINARY_NAME=local-ai
 
-GOLLAMA_VERSION?=fbec625895ba0c458f783b62c8569135c5e80d79
+GOLLAMA_VERSION?=4bd3910005a593a6db237bc82c506d6d9fb81b18
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
 GPT4ALL_VERSION?=73db20ba85fbbdc66a56e2619394c0eea40dc72b
 GOGGMLTRANSFORMERS_VERSION?=4f18e5eb75089dc1fc8f1c955bb8f73d18520a46
@@ -41,6 +41,10 @@ ifeq ($(BUILD_TYPE),cublas)
 	export LLAMA_CUBLAS=1
 endif
 
+ifeq ($(BUILD_TYPE),clblas)
+	CGO_LDFLAGS+=-lOpenCL -lclblast
+endif
+
 # glibc-static or glibc-devel-static required
 ifeq ($(STATIC),true)
 	LD_FLAGS=-linkmode external -extldflags -static
@@ -111,6 +115,8 @@ bloomz:
 	@find ./bloomz -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_bloomz_/g' {} +
 	@find ./bloomz -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gpt_bloomz_/g' {} +
 	@find ./bloomz -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gpt_bloomz_/g' {} +
+	@find ./bloomz -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_bloomz_replace/g' {} +
+	@find ./bloomz -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_bloomz_replace/g' {} +
 
 bloomz/libbloomz.a: bloomz
 	cd bloomz && make libbloomz.a

From 3911957d34f516f00af4346bc563105db8b9b05a Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Mon, 29 May 2023 15:35:33 +0200
Subject: [PATCH 063/137] fix(deps): update github.com/go-skynet/go-llama.cpp
 digest to 4bd3910 (#393)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 013aef86..38485b35 100644
--- a/go.mod
+++ b/go.mod
@@ -9,7 +9,7 @@ require (
 	github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf
 	github.com/go-skynet/go-bert.cpp v0.0.0-20230529074307-771b4a085972
 	github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230525204055-4f18e5eb7508
-	github.com/go-skynet/go-llama.cpp v0.0.0-20230524233806-6e7e69a1607e
+	github.com/go-skynet/go-llama.cpp v0.0.0-20230529120000-4bd3910005a5
 	github.com/gofiber/fiber/v2 v2.46.0
 	github.com/google/uuid v1.3.0
 	github.com/hashicorp/go-multierror v1.1.1
diff --git a/go.sum b/go.sum
index 6ec36f7f..0b4616ed 100644
--- a/go.sum
+++ b/go.sum
@@ -68,6 +68,8 @@ github.com/go-skynet/go-llama.cpp v0.0.0-20230523103108-dcf8da632bce h1:Mcq9LvYG
 github.com/go-skynet/go-llama.cpp v0.0.0-20230523103108-dcf8da632bce/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230524233806-6e7e69a1607e h1:zfxPbHj7/hN2F7V12vfxCi4CFsaVO1WohW96OVFtfNw=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230524233806-6e7e69a1607e/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230529120000-4bd3910005a5 h1:AbKnkgzkjkyoJtjOHgR3+rmNKOOjmRja6De3HEa7S7E=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230529120000-4bd3910005a5/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
 github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=

From 18a701355ce74e9d17e6858d3780297704c9cdf2 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Mon, 29 May 2023 16:46:36 +0200
Subject: [PATCH 064/137] fix(deps): update
 github.com/go-skynet/go-ggml-transformers.cpp digest to 695f97b (#410)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 38485b35..c1ea9b08 100644
--- a/go.mod
+++ b/go.mod
@@ -8,7 +8,7 @@ require (
 	github.com/go-audio/wav v1.1.0
 	github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf
 	github.com/go-skynet/go-bert.cpp v0.0.0-20230529074307-771b4a085972
-	github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230525204055-4f18e5eb7508
+	github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230529072326-695f97befe14
 	github.com/go-skynet/go-llama.cpp v0.0.0-20230529120000-4bd3910005a5
 	github.com/gofiber/fiber/v2 v2.46.0
 	github.com/google/uuid v1.3.0
diff --git a/go.sum b/go.sum
index 0b4616ed..cbe69f11 100644
--- a/go.sum
+++ b/go.sum
@@ -60,6 +60,8 @@ github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230524084634-c4c581f1853c
 github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230524084634-c4c581f1853c/go.mod h1:hjmO5UfipWl6xkPT54acOs9DDto8GPV81IvsBcvRjsA=
 github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230525204055-4f18e5eb7508 h1:pb7wUQlgqbakB4vILBq44iLe5w9bcjAsP7js2iFOWX8=
 github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230525204055-4f18e5eb7508/go.mod h1:hjmO5UfipWl6xkPT54acOs9DDto8GPV81IvsBcvRjsA=
+github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230529072326-695f97befe14 h1:0VZ5NbrtqvLvBRs0ioXBb9Mp8cOYRqG2WgAIf3+3dlw=
+github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230529072326-695f97befe14/go.mod h1:Rz967+t+aY6S+TBiW/WI8FM/C1WEMM+DamSMtKRxVAM=
 github.com/go-skynet/go-gpt2.cpp v0.0.0-20230523153133-3eb3a32c0874 h1:/6QWh2oarU7iPSpXj/3bLlkKptyxjKTRrNtGUrh8vhI=
 github.com/go-skynet/go-gpt2.cpp v0.0.0-20230523153133-3eb3a32c0874/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230520155239-ccf23adfb278 h1:st4ow9JKy3UuhkwutrbWof2vMFU/YxwBCLYZ1IxJ2Po=

From 190f01dbe3c4cbc909ebe38072fe510fccfc5f2d Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 29 May 2023 23:09:19 +0200
Subject: [PATCH 065/137] docs: update docs/license(clarification) and point to
 new website (#415)

Signed-off-by: mudler <mudler@mocaccino.org>
---
 LICENSE   |   2 +-
 README.md | 917 ++----------------------------------------------------
 2 files changed, 23 insertions(+), 896 deletions(-)

diff --git a/LICENSE b/LICENSE
index b9c46f0a..ad671f24 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2023 go-skynet authors
+Copyright (c) 2023 Ettore Di Giacinto
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/README.md b/README.md
index 60f2ca65..3560f1ca 100644
--- a/README.md
+++ b/README.md
@@ -9,65 +9,32 @@
 
 [![](https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted)](https://discord.gg/uJAeKSAGDy) 
 
-**LocalAI** is a drop-in replacement REST API that's compatible with OpenAI API specifications for local inferencing. It allows you to run models locally or on-prem with consumer grade hardware, supporting multiple model families that are compatible with the ggml format.
+**LocalAI** is a drop-in replacement REST API that’s compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families that are compatible with the ggml format. Does not require GPU.
 
-For a list of the supported model families, please see [the model compatibility table below](https://github.com/go-skynet/LocalAI#model-compatibility-table).
+For a list of the supported model families, please see [the model compatibility table](https://localai.io/model-compatibility/index.html#model-compatibility-table).
 
 In a nutshell:
 
 - Local, OpenAI drop-in alternative REST API. You own your data.
-- NO GPU required. NO Internet access is required either. Optional, GPU Acceleration is available in `llama.cpp`-compatible LLMs. [See building instructions](https://github.com/go-skynet/LocalAI#cublas).
+- NO GPU required. NO Internet access is required either. Optional, GPU Acceleration is available in `llama.cpp`-compatible LLMs. [See building instructions](https://localai.io/basics/build/index.html).
 - Supports multiple models, Audio transcription, Text generation with GPTs, Image generation with stable diffusion (experimental)
 - Once loaded the first time, it keep models loaded in memory for faster inference
 - Doesn't shell-out, but uses C++ bindings for a faster inference and better performance. 
 
-LocalAI is a community-driven project, focused on making the AI accessible to anyone. Any contribution, feedback and PR is welcome! It was initially created by [mudler](https://github.com/mudler/) at the [SpectroCloud OSS Office](https://github.com/spectrocloud).
+LocalAI was created by [Ettore Di Giacinto](https://github.com/mudler/) and is a community-driven project, focused on making the AI accessible to anyone. Any contribution, feedback and PR is welcome!
 
-See the [usage](https://github.com/go-skynet/LocalAI#usage) and [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/) sections to learn how to use LocalAI. For a list of curated models check out the [model gallery](https://github.com/go-skynet/model-gallery).
+| [ChatGPT OSS alternative](https://github.com/go-skynet/LocalAI/tree/update_docs_2/examples/chatbot-ui)                                                                                                                | [Image generation](https://localai.io/api-endpoints/index.html#image-generation)                                                                                                              |
+|------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------|
+|  ![Screenshot from 2023-04-26 23-59-55](https://user-images.githubusercontent.com/2420543/234715439-98d12e03-d3ce-4f94-ab54-2b256808e05e.png)            | ![b6441997879](https://github.com/go-skynet/LocalAI/assets/2420543/d50af51c-51b7-4f39-b6c2-bf04c403894c)                  |
 
-### How does it work?  
 
-<details>
-
-LocalAI is an API written in Go that serves as an OpenAI shim, enabling software already developed with OpenAI SDKs to seamlessly integrate with LocalAI. It can be effortlessly implemented as a substitute, even on consumer-grade hardware. This capability is achieved by employing various C++ backends, including [ggml](https://github.com/ggerganov/ggml), to perform inference on LLMs using both CPU and, if desired, GPU.
-
-LocalAI uses C++ bindings for optimizing speed. It is based on [llama.cpp](https://github.com/ggerganov/llama.cpp), [gpt4all](https://github.com/nomic-ai/gpt4all), [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp), [ggml](https://github.com/ggerganov/ggml), [whisper.cpp](https://github.com/ggerganov/whisper.cpp) for audio transcriptions, [bert.cpp](https://github.com/skeskinen/bert.cpp) for embedding and [StableDiffusion-NCN](https://github.com/EdVince/Stable-Diffusion-NCNN) for image generation. See [the model compatibility table](https://github.com/go-skynet/LocalAI#model-compatibility-table) to learn about all the components of LocalAI.
-
-![LocalAI](https://github.com/go-skynet/LocalAI/assets/2420543/38de3a9b-3866-48cd-9234-662f9571064a)
-
-</details>
+See the [Getting started](https://localai.io/basics/getting_started/index.html) and [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/) sections to learn how to use LocalAI. For a list of curated models check out the [model gallery](https://github.com/go-skynet/model-gallery).
 
 ## News
 
-- 23-05-2023: __v1.15.0__ released. `go-gpt2.cpp` backend got renamed to `go-ggml-transformers.cpp` updated including https://github.com/ggerganov/llama.cpp/pull/1508 which breaks compatibility with older models. This impacts RedPajama, GptNeoX, MPT(not `gpt4all-mpt`), Dolly, GPT2 and Starcoder based models. [Binary releases available](https://github.com/go-skynet/LocalAI/releases), various fixes, including https://github.com/go-skynet/LocalAI/pull/341 .
-- 21-05-2023: __v1.14.0__ released. Minor updates to the `/models/apply` endpoint, `llama.cpp` backend updated including https://github.com/ggerganov/llama.cpp/pull/1508 which breaks compatibility with older models. `gpt4all` is still compatible with the old format. 
-- 19-05-2023: __v1.13.0__ released! 🔥🔥 updates to the `gpt4all` and `llama` backend, consolidated CUDA support ( https://github.com/go-skynet/LocalAI/pull/310 thanks to @bubthegreat and @Thireus ), preliminar support for [installing models via API](https://github.com/go-skynet/LocalAI#advanced-prepare-models-using-the-api).
-- 17-05-2023:  __v1.12.0__ released! 🔥🔥 Minor fixes, plus CUDA (https://github.com/go-skynet/LocalAI/pull/258) support for `llama.cpp`-compatible models and image generation (https://github.com/go-skynet/LocalAI/pull/272).
-- 16-05-2023: 🔥🔥🔥 Experimental support for CUDA (https://github.com/go-skynet/LocalAI/pull/258) in the `llama.cpp` backend and Stable diffusion CPU image generation (https://github.com/go-skynet/LocalAI/pull/272) in `master`.
+- 29-05-2023: LocalAI now has a website, [https://localai.io](https://localai.io)! check the news in the [dedicated section](https://localai.io/basics/news/index.html)!
 
-Now LocalAI can generate images too:
-
-| mode=0                                                                                                                | mode=1 (winograd/sgemm)                                                                                                                |
-|------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------|
-| ![b6441997879](https://github.com/go-skynet/LocalAI/assets/2420543/d50af51c-51b7-4f39-b6c2-bf04c403894c)              | ![winograd2](https://github.com/go-skynet/LocalAI/assets/2420543/1935a69a-ecce-4afc-a099-1ac28cb649b3)                |
-
-- 14-05-2023: __v1.11.1__ released! `rwkv` backend patch release
-- 13-05-2023: __v1.11.0__ released! 🔥 Updated `llama.cpp` bindings: This update includes a breaking change in the model files ( https://github.com/ggerganov/llama.cpp/pull/1405 ) - old models should still work with the `gpt4all-llama` backend.
-- 12-05-2023: __v1.10.0__ released! 🔥🔥 Updated `gpt4all` bindings. Added support for GPTNeox (experimental), RedPajama (experimental), Starcoder (experimental), Replit (experimental), MosaicML MPT. Also now `embeddings` endpoint supports tokens arrays. See the [langchain-chroma](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain-chroma) example! Note - this update does NOT include https://github.com/ggerganov/llama.cpp/pull/1405 which makes models incompatible.
-- 11-05-2023: __v1.9.0__ released! 🔥 Important whisper updates ( https://github.com/go-skynet/LocalAI/pull/233 https://github.com/go-skynet/LocalAI/pull/229 ) and extended gpt4all model families support ( https://github.com/go-skynet/LocalAI/pull/232 ). Redpajama/dolly experimental ( https://github.com/go-skynet/LocalAI/pull/214 )
-- 10-05-2023: __v1.8.0__ released! 🔥 Added support for fast and accurate embeddings with `bert.cpp` ( https://github.com/go-skynet/LocalAI/pull/222 )
-- 09-05-2023: Added experimental support for transcriptions endpoint ( https://github.com/go-skynet/LocalAI/pull/211 )
-- 08-05-2023: Support for embeddings with models using the `llama.cpp` backend ( https://github.com/go-skynet/LocalAI/pull/207 )
-- 02-05-2023: Support for `rwkv.cpp` models ( https://github.com/go-skynet/LocalAI/pull/158 ) and for `/edits` endpoint
-- 01-05-2023: Support for SSE stream of tokens in `llama.cpp` backends ( https://github.com/go-skynet/LocalAI/pull/152 )
-
-Twitter: [@LocalAI_API](https://twitter.com/LocalAI_API) and [@mudler_it](https://twitter.com/mudler_it)
-
-### Blogs, articles, media
-
-- [LocalAI meets k8sgpt](https://www.youtube.com/watch?v=PKrDNuJ_dfE) - CNCF Webinar showcasing LocalAI and k8sgpt.
-- [Question Answering on Documents locally with LangChain, LocalAI, Chroma, and GPT4All](https://mudler.pm/posts/localai-question-answering/) by Ettore Di Giacinto
-- [Tutorial to use k8sgpt with LocalAI](https://medium.com/@tyler_97636/k8sgpt-localai-unlock-kubernetes-superpowers-for-free-584790de9b65) - excellent usecase for localAI, using AI to analyse Kubernetes clusters. by Tyller Gillson
+For latest news, follow also on Twitter [@LocalAI_API](https://twitter.com/LocalAI_API) and [@mudler_it](https://twitter.com/mudler_it)
 
 ## Contribute and help
 
@@ -81,75 +48,11 @@ To help the project you can:
 
 - If you don't have technological skills you can still help improving documentation or add examples or share your user-stories with our community, any help and contribution is welcome!
 
-## Model compatibility
-
-It is compatible with the models supported by [llama.cpp](https://github.com/ggerganov/llama.cpp) supports also [GPT4ALL-J](https://github.com/nomic-ai/gpt4all) and [cerebras-GPT with ggml](https://huggingface.co/lxe/Cerebras-GPT-2.7B-Alpaca-SP-ggml).
-
-Tested with:
-- Vicuna
-- Alpaca
-- [GPT4ALL](https://gpt4all.io)
-- [GPT4ALL-J](https://gpt4all.io/models/ggml-gpt4all-j.bin) (no changes required)
-- Koala
-- [cerebras-GPT with ggml](https://huggingface.co/lxe/Cerebras-GPT-2.7B-Alpaca-SP-ggml)
-- WizardLM
-- [RWKV](https://github.com/BlinkDL/RWKV-LM) models with [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp)
-
-Note: You might need to convert some models from older models to the new format, for indications, see [the README in llama.cpp](https://github.com/ggerganov/llama.cpp#using-gpt4all) for instance to run `gpt4all`.
-
-### RWKV
-
-<details>
-
-A full example on how to run a rwkv model is in the [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/rwkv).
-
-Note: rwkv models needs to specify the backend `rwkv` in the YAML config files and have an associated tokenizer along that needs to be provided with it:
-
-```
-36464540 -rw-r--r--  1 mudler mudler 1.2G May  3 10:51 rwkv_small
-36464543 -rw-r--r--  1 mudler mudler 2.4M May  3 10:51 rwkv_small.tokenizer.json
-```
-
-</details>
-
-### Others
-
-It should also be compatible with StableLM and GPTNeoX ggml models (untested).
-
-### Hardware requirements
-
-Depending on the model you are attempting to run might need more RAM or CPU resources. Check out also [here](https://github.com/ggerganov/llama.cpp#memorydisk-requirements) for `ggml` based backends. `rwkv` is less expensive on resources.
-
-
-### Model compatibility table
-
-<details>
-
-| Backend and Bindings                                                             | Compatible models     | Completion/Chat endpoint | Audio transcription/Image | Embeddings support                | Token stream support |
-|----------------------------------------------------------------------------------|-----------------------|--------------------------|---------------------------|-----------------------------------|----------------------|
-| [llama](https://github.com/ggerganov/llama.cpp) ([binding](https://github.com/go-skynet/go-llama.cpp))         | Vicuna, Alpaca, LLaMa | yes                      | no                        | yes (doesn't seem to be accurate) | yes                  |
-| [gpt4all-llama](https://github.com/nomic-ai/gpt4all)      | Vicuna, Alpaca, LLaMa | yes                      | no                        | no                                | yes                  |
-| [gpt4all-mpt](https://github.com/nomic-ai/gpt4all)          | MPT                   | yes                      | no                        | no                                | yes                  |
-| [gpt4all-j](https://github.com/nomic-ai/gpt4all)           | GPT4ALL-J             | yes                      | no                        | no                                | yes                  |
-| [gpt2](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp))             | GPT2, Cerebras    | yes                      | no                        | no                                | no                   |
-| [dolly](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp))            | Dolly                 | yes                      | no                        | no                                | no                   |
-| [gptj](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp))        | GPTJ             | yes                      | no                        | no                                | no                   |
-| [mpt](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp))         | MPT     | yes                      | no                        | no                                | no                   |
-| [replit](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp))        | Replit             | yes                      | no                        | no                                | no                   |
-| [gptneox](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp))        | GPT NeoX, RedPajama, StableLM             | yes                      | no                        | no                                | no                   |
-| [starcoder](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp))        | Starcoder             | yes                      | no                        | no                                | no                   |
-| [bloomz](https://github.com/NouamaneTazi/bloomz.cpp) ([binding](https://github.com/go-skynet/bloomz.cpp))       | Bloom                 | yes                      | no                        | no                                | no                   |
-| [rwkv](https://github.com/saharNooby/rwkv.cpp) ([binding](https://github.com/donomii/go-rw))       | rwkv                 | yes                      | no                        | no                                | yes                   |
-| [bert](https://github.com/skeskinen/bert.cpp) ([binding](https://github.com/go-skynet/go-bert.cpp) | bert                  | no                       | no                  | yes                               | no                   |    
-| [whisper](https://github.com/ggerganov/whisper.cpp)         | whisper               | no                       | Audio                 | no                                | no                   |  
-| [stablediffusion](https://github.com/EdVince/Stable-Diffusion-NCNN) ([binding](https://github.com/mudler/go-stable-diffusion))        | stablediffusion               | no                       | Image                 | no                                | no                   | 
-</details>
-
 ## Usage
 
-> `LocalAI` comes by default as a container image. You can check out all the available images with corresponding tags [here](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest).
+Check out the [Getting started](https://localai.io/basics/getting_started/index.html) section. Here below you will find generic, quick instructions to get ready and use LocalAI.
 
-The easiest way to run LocalAI is by using `docker-compose` (to build locally, see [building LocalAI](https://github.com/go-skynet/LocalAI/tree/master#setup)):
+The easiest way to run LocalAI is by using `docker-compose` (to build locally, see [building LocalAI](https://localai.io/basics/build/index.html)):
 
 ```bash
 
@@ -222,277 +125,6 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso
 ```
 </details>
 
-### Advanced: prepare models using the API
-
-Instead of installing models manually, you can use the LocalAI API endpoints and a model definition to install programmatically via API models in runtime.
-
-<details>
-
-A curated collection of model files is in the [model-gallery](https://github.com/go-skynet/model-gallery) (work in progress!).
-
-To install for example `gpt4all-j`, you can send a POST call to the `/models/apply` endpoint with the model definition url (`url`) and the name of the model should have in LocalAI (`name`, optional):
-
-```
-curl http://localhost:8080/models/apply -H "Content-Type: application/json" -d '{
-     "url": "https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml",
-     "name": "gpt4all-j"
-   }'  
-```
-
-</details>
-
-
-### Other examples
-
-![Screenshot from 2023-04-26 23-59-55](https://user-images.githubusercontent.com/2420543/234715439-98d12e03-d3ce-4f94-ab54-2b256808e05e.png)
-
-To see other examples on how to integrate with other projects for instance for question answering or for using it with chatbot-ui, see: [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/).
-
-
-### Advanced configuration
-
-LocalAI can be configured to serve user-defined models with a set of default parameters and templates.
-
-<details>
-
-You can create multiple `yaml` files in the models path or either specify a single YAML configuration file. 
-Consider the following `models` folder in the `example/chatbot-ui`:
-
-```
-base ❯ ls -liah examples/chatbot-ui/models 
-36487587 drwxr-xr-x 2 mudler mudler 4.0K May  3 12:27 .
-36487586 drwxr-xr-x 3 mudler mudler 4.0K May  3 10:42 ..
-36465214 -rw-r--r-- 1 mudler mudler   10 Apr 27 07:46 completion.tmpl
-36464855 -rw-r--r-- 1 mudler mudler 3.6G Apr 27 00:08 ggml-gpt4all-j
-36464537 -rw-r--r-- 1 mudler mudler  245 May  3 10:42 gpt-3.5-turbo.yaml
-36467388 -rw-r--r-- 1 mudler mudler  180 Apr 27 07:46 gpt4all.tmpl
-```
-
-In the `gpt-3.5-turbo.yaml` file it is defined the `gpt-3.5-turbo` model which is an alias to use `gpt4all-j` with pre-defined options.
-
-For instance, consider the following that declares `gpt-3.5-turbo` backed by the `ggml-gpt4all-j` model:
-
-```yaml
-name: gpt-3.5-turbo
-# Default model parameters
-parameters:
-  # Relative to the models path
-  model: ggml-gpt4all-j
-  # temperature
-  temperature: 0.3
-  # all the OpenAI request options here..
-
-# Default context size
-context_size: 512
-threads: 10
-# Define a backend (optional). By default it will try to guess the backend the first time the model is interacted with.
-backend: gptj # available: llama, stablelm, gpt2, gptj rwkv
-# stopwords (if supported by the backend)
-stopwords:
-- "HUMAN:"
-- "### Response:"
-# define chat roles
-roles:
-  user: "HUMAN:"
-  system: "GPT:"
-template:
-  # template file ".tmpl" with the prompt template to use by default on the endpoint call. Note there is no extension in the files
-  completion: completion
-  chat: ggml-gpt4all-j
-```
-
-Specifying a `config-file` via CLI allows to declare models in a single file as a list, for instance:
-
-```yaml
-- name: list1
-  parameters:
-    model: testmodel
-  context_size: 512
-  threads: 10
-  stopwords:
-  - "HUMAN:"
-  - "### Response:"
-  roles:
-    user: "HUMAN:"
-    system: "GPT:"
-  template:
-    completion: completion
-    chat: ggml-gpt4all-j
-- name: list2
-  parameters:
-    model: testmodel
-  context_size: 512
-  threads: 10
-  stopwords:
-  - "HUMAN:"
-  - "### Response:"
-  roles:
-    user: "HUMAN:"
-    system: "GPT:"
-  template:
-    completion: completion
-   chat: ggml-gpt4all-j
-```
-
-See also [chatbot-ui](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui) as an example on how to use config files.
-
-### Full config model file reference
-
-```yaml
-name: gpt-3.5-turbo
-
-# Default model parameters
-parameters:
-  # Relative to the models path
-  model: ggml-gpt4all-j
-  # temperature
-  temperature: 0.3
-  # all the OpenAI request options here..
-  top_k: 
-  top_p: 
-  max_tokens:
-  batch:
-  f16: true
-  ignore_eos: true
-  n_keep: 10
-  seed: 
-  mode: 
-  step: 
-
-# Default context size
-context_size: 512
-# Default number of threads
-threads: 10
-# Define a backend (optional). By default it will try to guess the backend the first time the model is interacted with.
-backend: gptj # available: llama, stablelm, gpt2, gptj rwkv
-# stopwords (if supported by the backend)
-stopwords:
-- "HUMAN:"
-- "### Response:"
-# string to trim space to
-trimspace:
-- string
-# Strings to cut from the response
-cutstrings:
-- "string"
-# define chat roles
-roles:
-  user: "HUMAN:"
-  system: "GPT:"
-  assistant: "ASSISTANT:"
-template:
-  # template file ".tmpl" with the prompt template to use by default on the endpoint call. Note there is no extension in the files
-  completion: completion
-  chat: ggml-gpt4all-j
-  edit: edit_template
-
-# Enable F16 if backend supports it
-f16: true
-# Enable debugging
-debug: true
-# Enable embeddings
-embeddings: true
-# Mirostat configuration (llama.cpp only)
-mirostat_eta: 0.8
-mirostat_tau: 0.9
-mirostat: 1
-
-# GPU Layers (only used when built with cublas)
-gpu_layers: 22
-
-# Directory used to store additional assets (used for stablediffusion)
-asset_dir: ""
-```
-</details>
-
-### Prompt templates 
-
-The API doesn't inject a default prompt for talking to the model. You have to use a prompt similar to what's described in the standford-alpaca docs: https://github.com/tatsu-lab/stanford_alpaca#data-release.
-
-<details>
-You can use a default template for every model present in your model path, by creating a corresponding file with the `.tmpl` suffix next to your model. For instance, if the model is called `foo.bin`, you can create a sibling file, `foo.bin.tmpl` which will be used as a default prompt and can be used with alpaca:
-
-```
-The below instruction describes a task. Write a response that appropriately completes the request.
-
-### Instruction:
-{{.Input}}
-
-### Response:
-```
-
-See the [prompt-templates](https://github.com/go-skynet/LocalAI/tree/master/prompt-templates) directory in this repository for templates for some of the most popular models.
-
-
-For the edit endpoint, an example template for alpaca-based models can be:
-
-```yaml
-Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
-
-### Instruction:
-{{.Instruction}}
-
-### Input:
-{{.Input}}
-
-### Response:
-```
-
-</details>
-
-### CLI
-
-You can control LocalAI with command line arguments, to specify a binding address, or the number of threads.
-
-<details>
-
-Usage:
-
-```
-local-ai --models-path <model_path> [--address <address>] [--threads <num_threads>]
-```
-
-| Parameter    | Environment Variable | Default Value | Description                            |
-| ------------ | -------------------- | ------------- | -------------------------------------- |
-| models-path        | MODELS_PATH           |               | The path where you have models (ending with `.bin`).      |
-| threads      | THREADS              | Number of Physical cores     | The number of threads to use for text generation. |
-| address      | ADDRESS              | :8080         | The address and port to listen on. |
-| context-size | CONTEXT_SIZE         | 512           | Default token context size. |
-| debug | DEBUG         | false           | Enable debug mode. |
-| config-file | CONFIG_FILE         | empty           | Path to a LocalAI config file. |
-| upload_limit | UPLOAD_LIMIT         | 5MB           | Upload limit for whisper. |
-| image-path | IMAGE_PATH         | empty           | Image directory to store and serve processed images. |
-
-</details>
-
-## Setup
-
-Currently LocalAI comes as a container image and can be used with docker or a container engine of choice. You can check out all the available images with corresponding tags [here](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest).
-
-### Docker
-
-<details>
-Example of starting the API with `docker`:
-
-```bash
-docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:latest --models-path /path/to/models --context-size 700 --threads 4
-```
-
-You should see:
-```
-┌───────────────────────────────────────────────────┐ 
-│                   Fiber v2.42.0                   │ 
-│               http://127.0.0.1:8080               │ 
-│       (bound on host 0.0.0.0 and port 8080)       │ 
-│                                                   │ 
-│ Handlers ............. 1  Processes ........... 1 │ 
-│ Prefork ....... Disabled  PID ................. 1 │ 
-└───────────────────────────────────────────────────┘ 
-```
-
-Note: the binary inside the image is rebuild at the start of the container to enable CPU optimizations for the execution environment, you can set the environment variable `REBUILD` to `false` to prevent this behavior.
-
-</details>
 
 ### Build locally
 
@@ -502,8 +134,8 @@ In order to build the `LocalAI` container image locally you can use `docker`:
 
 ```
 # build the image
-docker build -t LocalAI .
-docker run LocalAI
+docker build -t localai .
+docker run localai
 ```
 
 Or you can build the binary with `make`:
@@ -514,520 +146,19 @@ make build
 
 </details>
 
-### Build on mac
-
-Building on Mac (M1 or M2) works, but you may need to install some prerequisites using `brew`. 
-
-<details>
-
-The below has been tested by one mac user and found to work. Note that this doesn't use docker to run the server:
-
-```
-# install build dependencies
-brew install cmake
-brew install go
-
-# clone the repo
-git clone https://github.com/go-skynet/LocalAI.git
-
-cd LocalAI
-
-# build the binary
-make build
-
-# Download gpt4all-j to models/
-wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
-
-# Use a template from the examples
-cp -rf prompt-templates/ggml-gpt4all-j.tmpl models/
-
-# Run LocalAI
-./local-ai --models-path ./models/ --debug
-
-# Now API is accessible at localhost:8080
-curl http://localhost:8080/v1/models
-
-curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-     "model": "ggml-gpt4all-j",
-     "messages": [{"role": "user", "content": "How are you?"}],
-     "temperature": 0.9 
-   }'
-```
-
-</details>
-
-### Build with Image generation support
-
-<details>
-
-**Requirements**: OpenCV, Gomp
-
-Image generation is experimental and requires `GO_TAGS=stablediffusion` to be set during build:
-
-```
-make GO_TAGS=stablediffusion rebuild
-```
-
-</details>
-
-### Accelleration
-
-#### OpenBLAS
-
-<details>
-
-Requirements: OpenBLAS
-
-```
-make BUILD_TYPE=openblas build
-```
-
-</details>
-
-#### CuBLAS
-
-<details>
-
-Requirement: Nvidia CUDA toolkit
-
-Note: CuBLAS support is experimental, and has not been tested on real HW. please report any issues you find!
-
-```
-make BUILD_TYPE=cublas build
-```
-
-More informations available in the upstream PR: https://github.com/ggerganov/llama.cpp/pull/1412
-
-</details>
-
-### Windows compatibility
-
-It should work, however you need to make sure you give enough resources to the container. See https://github.com/go-skynet/LocalAI/issues/2
+See the [build section](https://localai.io/basics/build/index.html) in our documentation for detailed instructions.
 
 ### Run LocalAI in Kubernetes
 
-LocalAI can be installed inside Kubernetes with helm.
+LocalAI can be installed inside Kubernetes with helm. See [installation instructions](https://localai.io/basics/getting_started/index.html#run-localai-in-kubernetes).
 
-<details>
-By default, the helm chart will install LocalAI instance using the ggml-gpt4all-j model without persistent storage.
+## Supported API endpoints
 
-1. Add the helm repo
-    ```bash
-    helm repo add go-skynet https://go-skynet.github.io/helm-charts/
-    ```
-2. Install the helm chart:
-    ```bash
-    helm repo update
-    helm install local-ai go-skynet/local-ai -f values.yaml
-    ```
-> **Note:** For further configuration options, see the [helm chart repository on GitHub](https://github.com/go-skynet/helm-charts).
-### Example values
-Deploy a single LocalAI pod with 6GB of persistent storage serving up a `ggml-gpt4all-j` model with custom prompt.
-```yaml
-### values.yaml
-
-deployment:
-  # Adjust the number of threads and context size for model inference
-  env:
-    threads: 14
-    contextSize: 512
-
-# Set the pod requests/limits
-resources:
-  limits:
-    cpu: 4000m
-    memory: 7000Mi
-  requests:
-    cpu: 100m
-    memory: 6000Mi
-
-# Add a custom prompt template for the ggml-gpt4all-j model
-promptTemplates:
-  # The name of the model this template belongs to
-  ggml-gpt4all-j.bin.tmpl: |
-    This is my custom prompt template...
-    ### Prompt:
-    {{.Input}}
-    ### Response:
-
-# Model configuration
-models:
-  # Don't re-download models on pod creation
-  forceDownload: false
-
-  # List of models to download and serve
-  list:
-    - url: "https://gpt4all.io/models/ggml-gpt4all-j.bin"
-       # Optional basic HTTP authentication
-      basicAuth: base64EncodedCredentials
-  
-  # Enable 6Gb of persistent storage models and prompt templates
-  persistence:
-    enabled: true
-    size: 6Gi
-
-service:
-  type: ClusterIP
-  annotations: {}
-  # If using an AWS load balancer, you'll need to override the default 60s load balancer idle timeout
-  # service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
-```
-</details>
-
-## Supported OpenAI API endpoints
-
-You can check out the [OpenAI API reference](https://platform.openai.com/docs/api-reference/chat/create). 
-
-Following the list of endpoints/parameters supported. 
-
-Note:
-
-- You can also specify the model as part of the OpenAI token.
-- If only one model is available, the API will use it for all the requests.
-
-### Chat completions
-
-<details>
-For example, to generate a chat completion, you can send a POST request to the `/v1/chat/completions` endpoint with the instruction as the request body:
-
-```
-curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-     "model": "ggml-koala-7b-model-q4_0-r2.bin",
-     "messages": [{"role": "user", "content": "Say this is a test!"}],
-     "temperature": 0.7
-   }'
-```
-
-Available additional parameters: `top_p`, `top_k`, `max_tokens`
-</details>
-
-### Edit completions
-
-<details>
-To generate an edit completion you can send a POST request to the `/v1/edits` endpoint with the instruction as the request body:
-
-```
-curl http://localhost:8080/v1/edits -H "Content-Type: application/json" -d '{
-     "model": "ggml-koala-7b-model-q4_0-r2.bin",
-     "instruction": "rephrase",
-     "input": "Black cat jumped out of the window",
-     "temperature": 0.7
-   }'
-```
-
-Available additional parameters: `top_p`, `top_k`, `max_tokens`.
-
-</details>
-
-### Completions
-
-<details>
-
-To generate a completion, you can send a POST request to the `/v1/completions` endpoint with the instruction as per the request body:
-
-```
-curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
-     "model": "ggml-koala-7b-model-q4_0-r2.bin",
-     "prompt": "A long time ago in a galaxy far, far away",
-     "temperature": 0.7
-   }'
-```
-
-Available additional parameters: `top_p`, `top_k`, `max_tokens`
-
-</details>
-
-### List models
-
-<details>
-You can list all the models available with:
-
-```
-curl http://localhost:8080/v1/models
-```
-
-</details>
-
-### Embeddings
-
-OpenAI docs: https://platform.openai.com/docs/api-reference/embeddings
-
-<details>
-
-The embedding endpoint is experimental and enabled only if the model is configured with `embeddings: true` in its `yaml` file, for example:
-
-```yaml
-name: text-embedding-ada-002
-parameters:
-  model: bert
-embeddings: true
-backend: "bert-embeddings"
-```
-
-There is an example available [here](https://github.com/go-skynet/LocalAI/tree/master/examples/query_data/).
-
-Note: embeddings is supported only with `llama.cpp` compatible models and `bert` models. bert is more performant and available independently of the LLM model.
-
-</details>
-
-### Transcriptions endpoint
-
-<details>
-
-Note: requires ffmpeg in the container image, which is currently not shipped due to licensing issues. We will prepare separated images with ffmpeg. (stay tuned!)
-
-Download one of the models from https://huggingface.co/ggerganov/whisper.cpp/tree/main in the `models` folder, and create a YAML file for your model:
-
-```yaml
-name: whisper-1
-backend: whisper
-parameters:
-  model: whisper-en
-```
-
-The transcriptions endpoint then can be tested like so:
-```
-wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
-
-curl http://localhost:8080/v1/audio/transcriptions -H "Content-Type: multipart/form-data" -F file="@$PWD/gb1.ogg" -F model="whisper-1"                                                     
-
-{"text":"My fellow Americans, this day has brought terrible news and great sadness to our country.At nine o'clock this morning, Mission Control in Houston lost contact with our Space ShuttleColumbia.A short time later, debris was seen falling from the skies above Texas.The Columbia's lost.There are no survivors.One board was a crew of seven.Colonel Rick Husband, Lieutenant Colonel Michael Anderson, Commander Laurel Clark, Captain DavidBrown, Commander William McCool, Dr. Kultna Shavla, and Elon Ramon, a colonel in the IsraeliAir Force.These men and women assumed great risk in the service to all humanity.In an age when spaceflight has come to seem almost routine, it is easy to overlook thedangers of travel by rocket and the difficulties of navigating the fierce outer atmosphere ofthe Earth.These astronauts knew the dangers, and they faced them willingly, knowing they had a highand noble purpose in life.Because of their courage and daring and idealism, we will miss them all the more.All Americans today are thinking as well of the families of these men and women who havebeen given this sudden shock and grief.You're not alone.Our entire nation agrees with you, and those you loved will always have the respect andgratitude of this country.The cause in which they died will continue.Mankind has led into the darkness beyond our world by the inspiration of discovery andthe longing to understand.Our journey into space will go on.In the skies today, we saw destruction and tragedy.As farther than we can see, there is comfort and hope.In the words of the prophet Isaiah, \"Lift your eyes and look to the heavens who createdall these, he who brings out the starry hosts one by one and calls them each by name.\"Because of his great power and mighty strength, not one of them is missing.The same creator who names the stars also knows the names of the seven souls we mourntoday.The crew of the shuttle Columbia did not return safely to Earth yet we can pray that all aresafely home.May God bless the grieving families and may God continue to bless America.[BLANK_AUDIO]"}
-```
-
-</details>
-  
-### Image generation
-
-OpenAI docs: https://platform.openai.com/docs/api-reference/images/create
-
-LocalAI supports generating images with Stable diffusion, running on CPU.
-
-| mode=0                                                                                                                | mode=1 (winograd/sgemm)                                                                                                                |
-|------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------|
-| ![test](https://github.com/go-skynet/LocalAI/assets/2420543/7145bdee-4134-45bb-84d4-f11cb08a5638)                      | ![b643343452981](https://github.com/go-skynet/LocalAI/assets/2420543/abf14de1-4f50-4715-aaa4-411d703a942a)          |
-| ![b6441997879](https://github.com/go-skynet/LocalAI/assets/2420543/d50af51c-51b7-4f39-b6c2-bf04c403894c)              | ![winograd2](https://github.com/go-skynet/LocalAI/assets/2420543/1935a69a-ecce-4afc-a099-1ac28cb649b3)                |
-| ![winograd](https://github.com/go-skynet/LocalAI/assets/2420543/1979a8c4-a70d-4602-95ed-642f382f6c6a)                | ![winograd3](https://github.com/go-skynet/LocalAI/assets/2420543/e6d184d4-5002-408f-b564-163986e1bdfb)                |
-
-<details>
-
-To generate an image you can send a POST request to the `/v1/images/generations` endpoint with the instruction as the request body:
-
-```bash
-# 512x512 is supported too
-curl http://localhost:8080/v1/images/generations -H "Content-Type: application/json" -d '{
-            "prompt": "A cute baby sea otter",
-            "size": "256x256" 
-          }'
-```
-
-Available additional parameters: `mode`, `step`.
-
-Note: To set a negative prompt, you can split the prompt with `|`, for instance: `a cute baby sea otter|malformed`.
-
-```bash
-curl http://localhost:8080/v1/images/generations -H "Content-Type: application/json" -d '{
-            "prompt": "floating hair, portrait, ((loli)), ((one girl)), cute face, hidden hands, asymmetrical bangs, beautiful detailed eyes, eye shadow, hair ornament, ribbons, bowties, buttons, pleated skirt, (((masterpiece))), ((best quality)), colorful|((part of the head)), ((((mutated hands and fingers)))), deformed, blurry, bad anatomy, disfigured, poorly drawn face, mutation, mutated, extra limb, ugly, poorly drawn hands, missing limb, blurry, floating limbs, disconnected limbs, malformed hands, blur, out of focus, long neck, long body, Octane renderer, lowres, bad anatomy, bad hands, text",
-            "size": "256x256"
-          }'
-```
-
-Note: image generator supports images up to 512x512. You can use other tools however to upscale the image, for instance: https://github.com/upscayl/upscayl.
-
-#### Setup
-
-Note: In order to use the `images/generation` endpoint, you need to build LocalAI with `GO_TAGS=stablediffusion`.
-
-1. Create a model file `stablediffusion.yaml` in the models folder:
-
-```yaml
-name: stablediffusion
-backend: stablediffusion
-asset_dir: stablediffusion_assets
-```
-2. Create a `stablediffusion_assets` directory inside your `models` directory
-3. Download the ncnn assets from https://github.com/EdVince/Stable-Diffusion-NCNN#out-of-box and place them in `stablediffusion_assets`.
-
-The models directory should look like the following:
-
-```
-models
-├── stablediffusion_assets
-│   ├── AutoencoderKL-256-256-fp16-opt.param
-│   ├── AutoencoderKL-512-512-fp16-opt.param
-│   ├── AutoencoderKL-base-fp16.param
-│   ├── AutoencoderKL-encoder-512-512-fp16.bin
-│   ├── AutoencoderKL-fp16.bin
-│   ├── FrozenCLIPEmbedder-fp16.bin
-│   ├── FrozenCLIPEmbedder-fp16.param
-│   ├── log_sigmas.bin
-│   ├── tmp-AutoencoderKL-encoder-256-256-fp16.param
-│   ├── UNetModel-256-256-MHA-fp16-opt.param
-│   ├── UNetModel-512-512-MHA-fp16-opt.param
-│   ├── UNetModel-base-MHA-fp16.param
-│   ├── UNetModel-MHA-fp16.bin
-│   └── vocab.txt
-└── stablediffusion.yaml
-```
-
-</details>
-
-## LocalAI API endpoints
-
-Besides the OpenAI endpoints, there are additional LocalAI-only API endpoints.
-
-### Applying a model - `/models/apply`
-
-This endpoint can be used to install a model in runtime. 
-
-<details>
-
-LocalAI will create a batch process that downloads the required files from a model definition and automatically reload itself to include the new model. 
-
-Input: `url`, `name` (optional), `files` (optional)
-
-```bash
-curl http://localhost:8080/models/apply -H "Content-Type: application/json" -d '{
-     "url": "<MODEL_DEFINITION_URL>",
-     "name": "<MODEL_NAME>",
-     "files": [
-        {
-            "uri": "<additional_file>",
-            "sha256": "<additional_file_hash>",
-            "filename": "<additional_file_name>"
-        },
-      "overrides": { "backend": "...", "f16": true }
-     ]
-   }
-```
-
-An optional, list of additional files can be specified to be downloaded within `files`. The `name` allows to override the model name. Finally it is possible to override the model config file with `override`.
-
-Returns an `uuid` and an `url` to follow up the state of the process:
-
-```json
-{ "uuid":"251475c9-f666-11ed-95e0-9a8a4480ac58", "status":"http://localhost:8080/models/jobs/251475c9-f666-11ed-95e0-9a8a4480ac58"}
-```
-
-To see a collection example of curated models definition files, see the [model-gallery](https://github.com/go-skynet/model-gallery).
-
-</details>
-
-### Inquiry model job state `/models/jobs/<uid>`
-
-This endpoint returns the state of the batch job associated to a model
-<details>
-
-This endpoint can be used with the uuid returned by `/models/apply` to check a job state:
-
-```bash
-curl http://localhost:8080/models/jobs/251475c9-f666-11ed-95e0-9a8a4480ac58
-```
-
-Returns a json containing the error, and if the job is being processed:
-
-```json
-{"error":null,"processed":true,"message":"completed"}
-```
-
-</details>
-
-## Clients
-
-OpenAI clients are already compatible with LocalAI by overriding the basePath, or the target URL.
-
-## Javascript
-
-<details> 
-
-https://github.com/openai/openai-node/
-
-```javascript
-import { Configuration, OpenAIApi } from 'openai';
-
-const configuration = new Configuration({
-  basePath: `http://localhost:8080/v1`
-});
-const openai = new OpenAIApi(configuration);
-```
-
-</details>
-
-## Python
-
-<details>
-
-https://github.com/openai/openai-python
-
-Set the `OPENAI_API_BASE` environment variable, or by code:
-
-```python
-import openai
-
-openai.api_base = "http://localhost:8080/v1"
-
-# create a chat completion
-chat_completion = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hello world"}])
-
-# print the completion
-print(completion.choices[0].message.content)
-```
-
-</details>
+See the [list of the supported API endpoints](https://localai.io/api-endpoints/index.html) and how to configure image generation and audio transcription.
 
 ## Frequently asked questions
 
-Here are answers to some of the most common questions.
-
-
-### How do I get models? 
-
-<details>
-
-Most ggml-based models should work, but newer models may require additions to the API. If a model doesn't work, please feel free to open up issues. However, be cautious about downloading models from the internet and directly onto your machine, as there may be security vulnerabilities in lama.cpp or ggml that could be maliciously exploited. Some models can be found on Hugging Face: https://huggingface.co/models?search=ggml, or models from gpt4all should also work: https://github.com/nomic-ai/gpt4all.
-
-</details>
-
-### What's the difference with Serge, or XXX?
-
-
-<details>
-
-LocalAI is a multi-model solution that doesn't focus on a specific model type (e.g., llama.cpp or alpaca.cpp), and it handles all of these internally for faster inference,  easy to set up locally and deploy to Kubernetes.
-
-</details>
-
-
-### Can I use it with a Discord bot, or XXX?
-
-<details>
-
-Yes! If the client uses OpenAI and supports setting a different base URL to send requests to, you can use the LocalAI endpoint. This allows to use this with every application that was supposed to work with OpenAI, but without changing the application!
-
-</details>
-
-
-### Can this leverage GPUs? 
-
-<details>
-
-There is partial GPU support, see build instructions above.
-
-</details>
-
-### Where is the webUI? 
-
-<details> 
-There is the availability of localai-webui and chatbot-ui in the examples section and can be setup as per the instructions. However as LocalAI is an API you can already plug it into existing projects that provides are UI interfaces to OpenAI's APIs. There are several already on github, and should be compatible with LocalAI already (as it mimics the OpenAI API)
-
-</details>
-
-### Does it work with AutoGPT? 
-
-<details>
-
-Yes, see the [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/)!
-
-</details>
+See [the FAQ](https://localai.io/faq/index.html) section for a list of common questions.
 
 ## Projects already using LocalAI to run local models
 
@@ -1058,17 +189,13 @@ Feel free to open up a PR to get your project listed!
 
 ## License
 
-LocalAI is a community-driven project. It was initially created by [Ettore Di Giacinto](https://github.com/mudler/) at the [SpectroCloud OSS Office](https://github.com/spectrocloud).
+LocalAI is a community-driven project created by [Ettore Di Giacinto](https://github.com/mudler/).
 
 MIT
 
-## Golang bindings used
+## Author
 
-- [go-skynet/go-llama.cpp](https://github.com/go-skynet/go-llama.cpp)
-- [go-skynet/go-gpt4all-j.cpp](https://github.com/go-skynet/go-gpt4all-j.cpp)
-- [go-skynet/go-ggml-transformers.cpp](https://github.com/go-skynet/go-ggml-transformers.cpp)
-- [go-skynet/go-bert.cpp](https://github.com/go-skynet/go-bert.cpp)
-- [donomii/go-rwkv.cpp](https://github.com/donomii/go-rwkv.cpp)
+Ettore Di Giacinto and others
 
 ## Acknowledgements
 

From 2abdac7003437267b72c1a56884789ce7778ddf9 Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Mon, 29 May 2023 23:09:42 +0200
Subject: [PATCH 066/137] :arrow_up: Update go-skynet/bloomz.cpp (#417)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 4c5bb0de..ca33fd71 100644
--- a/Makefile
+++ b/Makefile
@@ -11,7 +11,7 @@ RWKV_REPO?=https://github.com/mudler/go-rwkv.cpp
 RWKV_VERSION?=dcbd34aff983b3d04fa300c5da5ec4bfdf6db295
 WHISPER_CPP_VERSION?=9b926844e3ae0ca6a0d13573b2e0349be1a4b573
 BERT_VERSION?=cea1ed76a7f48ef386a8e369f6c82c48cdf2d551
-BLOOMZ_VERSION?=e9366e82abdfe70565644fbfae9651976714efd1
+BLOOMZ_VERSION?=1834e77b83faafe912ad4092ccf7f77937349e2f
 BUILD_TYPE?=
 CGO_LDFLAGS?=
 CUDA_LIBPATH?=/usr/local/cuda/lib64/

From 04d6bd7922a506dd899b74217029201db5a272fb Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Mon, 29 May 2023 23:10:43 +0200
Subject: [PATCH 067/137] :arrow_up: Update go-skynet/go-ggml-transformers.cpp
 (#421)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index ca33fd71..8a0f62aa 100644
--- a/Makefile
+++ b/Makefile
@@ -6,7 +6,7 @@ BINARY_NAME=local-ai
 GOLLAMA_VERSION?=4bd3910005a593a6db237bc82c506d6d9fb81b18
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
 GPT4ALL_VERSION?=73db20ba85fbbdc66a56e2619394c0eea40dc72b
-GOGGMLTRANSFORMERS_VERSION?=4f18e5eb75089dc1fc8f1c955bb8f73d18520a46
+GOGGMLTRANSFORMERS_VERSION?=695f97befe14f0107d8da1c11f5b84912e0754b6
 RWKV_REPO?=https://github.com/mudler/go-rwkv.cpp
 RWKV_VERSION?=dcbd34aff983b3d04fa300c5da5ec4bfdf6db295
 WHISPER_CPP_VERSION?=9b926844e3ae0ca6a0d13573b2e0349be1a4b573

From 171b50bb1ceb88af9d06a032f4bf1268191a444a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Prud=27homme?=
 <sebastien.prudhomme@gmail.com>
Date: Mon, 29 May 2023 23:11:29 +0200
Subject: [PATCH 068/137] ci: fix typo in variable name (#424)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Sébastien Prud'homme <sebastien.prudhomme@gmail.com>
---
 .github/workflows/release.yaml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index ea49088b..6d564657 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -20,7 +20,7 @@ jobs:
     steps:
       - name: Clone
         uses: actions/checkout@v3
-        with: 
+        with:
           submodules: true
       - name: Dependencies
         run: |
@@ -29,7 +29,7 @@ jobs:
       - name: Build
         id: build
         env:
-          CMAKE_ARGS: "${{ matrix.define }}"
+          CMAKE_ARGS: "${{ matrix.defines }}"
           BUILD_ID: "${{ matrix.build }}"
         run: |
           make dist
@@ -58,7 +58,7 @@ jobs:
     steps:
       - name: Clone
         uses: actions/checkout@v3
-        with: 
+        with:
           submodules: true
 
       - name: Dependencies
@@ -68,7 +68,7 @@ jobs:
       - name: Build
         id: build
         env:
-          CMAKE_ARGS: "${{ matrix.define }}"
+          CMAKE_ARGS: "${{ matrix.defines }}"
           BUILD_ID: "${{ matrix.build }}"
         run: |
           make dist

From 2272324fd610eaee079a0b5073916aea4620b282 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Prud=27homme?=
 <sebastien.prudhomme@gmail.com>
Date: Mon, 29 May 2023 23:12:27 +0200
Subject: [PATCH 069/137] feat: add CuBLAS support in Docker images (#403)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Sébastien Prud'homme <sebastien.prudhomme@gmail.com>
---
 Dockerfile     | 48 ++++++++++++++++++++++++++++-----
 Dockerfile.dev | 73 +++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 107 insertions(+), 14 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 1f3830d6..d730d4b4 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,15 +1,49 @@
 ARG GO_VERSION=1.20
-ARG BUILD_TYPE=
 FROM golang:$GO_VERSION
-ENV REBUILD=true
-WORKDIR /build
-RUN apt-get update && apt-get install -y cmake curl libgomp1 libopenblas-dev libopenblas-base libopencv-dev libopencv-core-dev libopencv-core4.5 ca-certificates
-COPY . .
-RUN ln -s /usr/include/opencv4/opencv2/ /usr/include/opencv2
-RUN make build
+
+ARG BUILD_TYPE=
+ARG GO_TAGS=
+ARG CUDA_MAJOR_VERSION=11
+ARG CUDA_MINOR_VERSION=7
+
+ENV BUILD_TYPE=${BUILD_TYPE}
+ENV GO_TAGS=${GO_TAGS}
+ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
+ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
+ENV NVIDIA_VISIBLE_DEVICES=all
 ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
+ENV REBUILD=true
+
+WORKDIR /build
+
+RUN apt-get update && \
+    apt-get install -y ca-certificates cmake curl
+
+# CuBLAS requirements
+RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
+    apt-get install -y software-properties-common && \
+    apt-add-repository contrib && \
+    curl -O https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.0-1_all.deb && \
+    dpkg -i cuda-keyring_1.0-1_all.deb && \
+    rm -f cuda-keyring_1.0-1_all.deb && \
+    apt-get update && \
+    apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+    ; fi
+ENV PATH /usr/local/cuda/bin:${PATH}
+
+# OpenBLAS requirements
+RUN apt-get install -y libopenblas-dev
+
+# Stable Diffusion requirements
+RUN apt-get install -y libopencv-dev && \
+    ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
+
+COPY . .
+RUN make build
+
 # Define the health check command
 HEALTHCHECK --interval=30s --timeout=360s --retries=10 \
   CMD curl -f $HEALTHCHECK_ENDPOINT || exit 1
+
 EXPOSE 8080
 ENTRYPOINT [ "/build/entrypoint.sh" ]
diff --git a/Dockerfile.dev b/Dockerfile.dev
index 1e355f1b..df44359a 100644
--- a/Dockerfile.dev
+++ b/Dockerfile.dev
@@ -1,20 +1,79 @@
 ARG GO_VERSION=1.20
 ARG DEBIAN_VERSION=11
-ARG BUILD_TYPE=
-
 FROM golang:$GO_VERSION as builder
+
+ARG BUILD_TYPE=
+ARG GO_TAGS=
+ARG CUDA_MAJOR_VERSION=11
+ARG CUDA_MINOR_VERSION=7
+
+ENV BUILD_TYPE=${BUILD_TYPE}
+ENV GO_TAGS=${GO_TAGS}
+
 WORKDIR /build
-RUN apt-get update && apt-get install -y cmake libgomp1 libopenblas-dev libopenblas-base libopencv-dev libopencv-core-dev libopencv-core4.5 
-RUN ln -s /usr/include/opencv4/opencv2/ /usr/include/opencv2
+
+RUN apt-get update && \
+    apt-get install -y cmake
+
+# CuBLAS requirements
+RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
+    apt-get install -y software-properties-common && \
+    apt-add-repository contrib && \
+    curl -O https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.0-1_all.deb && \
+    dpkg -i cuda-keyring_1.0-1_all.deb && \
+    rm -f cuda-keyring_1.0-1_all.deb && \
+    apt-get update && \
+    apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+    ; fi
+ENV PATH /usr/local/cuda/bin:${PATH}
+
+# OpenBLAS requirements
+RUN apt-get install -y libopenblas-dev
+
+# Stable Diffusion requirements
+RUN apt-get install -y libopencv-dev && \
+    ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
+
 COPY . .
 RUN make build
 
 FROM debian:$DEBIAN_VERSION
-COPY --from=builder /build/local-ai /usr/bin/local-ai
-RUN apt-get update && apt-get install -y ca-certificates curl
+
+ARG BUILD_TYPE=
+ARG GO_TAGS=
+ARG CUDA_MAJOR_VERSION=11
+ARG CUDA_MINOR_VERSION=7
+
+ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
+ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
+ENV NVIDIA_VISIBLE_DEVICES=all
 ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
+
+RUN apt-get update && \
+    apt-get install -y ca-certificates curl
+
+# CuBLAS requirements
+RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
+    apt-get install -y curl software-properties-common && \
+    apt-add-repository contrib && \
+    curl -O https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.0-1_all.deb && \
+    dpkg -i cuda-keyring_1.0-1_all.deb && \
+    rm -f cuda-keyring_1.0-1_all.deb && \
+    apt-get update && \
+    apt-get install -y cuda-cudart-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+    ; fi
+
+# OpenBLAS requirements
+RUN apt-get install -y libopenblas0
+
+# Stable Diffusion requirements
+RUN apt-get install -y libgomp1 libopencv-core4.5 libopencv-imgcodecs4.5
+
+COPY --from=builder /build/local-ai /usr/bin/local-ai
+
 # Define the health check command
 HEALTHCHECK --interval=30s --timeout=360s --retries=10 \
   CMD curl -f $HEALTHCHECK_ENDPOINT || exit 1
+
 EXPOSE 8080
-ENTRYPOINT [ "/usr/bin/local-ai" ]
\ No newline at end of file
+ENTRYPOINT [ "/usr/bin/local-ai" ]

From 2c918378652522648bc45dd6b5410c5e1713d9ae Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 29 May 2023 23:13:42 +0200
Subject: [PATCH 070/137] docs: fix link (#426)

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 3560f1ca..f3777af7 100644
--- a/README.md
+++ b/README.md
@@ -23,7 +23,7 @@ In a nutshell:
 
 LocalAI was created by [Ettore Di Giacinto](https://github.com/mudler/) and is a community-driven project, focused on making the AI accessible to anyone. Any contribution, feedback and PR is welcome!
 
-| [ChatGPT OSS alternative](https://github.com/go-skynet/LocalAI/tree/update_docs_2/examples/chatbot-ui)                                                                                                                | [Image generation](https://localai.io/api-endpoints/index.html#image-generation)                                                                                                              |
+| [ChatGPT OSS alternative](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui)                                                                                                                | [Image generation](https://localai.io/api-endpoints/index.html#image-generation)                                                                                                              |
 |------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------|
 |  ![Screenshot from 2023-04-26 23-59-55](https://user-images.githubusercontent.com/2420543/234715439-98d12e03-d3ce-4f94-ab54-2b256808e05e.png)            | ![b6441997879](https://github.com/go-skynet/LocalAI/assets/2420543/d50af51c-51b7-4f39-b6c2-bf04c403894c)                  |
 

From ca9115d6d083c0787430f9a2f91026a37026da1f Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Tue, 30 May 2023 11:34:13 +0200
Subject: [PATCH 071/137] fix(deps): update
 github.com/go-skynet/go-ggml-transformers.cpp digest to 13ccc22 (#427)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index c1ea9b08..541c7a13 100644
--- a/go.mod
+++ b/go.mod
@@ -8,7 +8,7 @@ require (
 	github.com/go-audio/wav v1.1.0
 	github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf
 	github.com/go-skynet/go-bert.cpp v0.0.0-20230529074307-771b4a085972
-	github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230529072326-695f97befe14
+	github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230529215936-13ccc22621bb
 	github.com/go-skynet/go-llama.cpp v0.0.0-20230529120000-4bd3910005a5
 	github.com/gofiber/fiber/v2 v2.46.0
 	github.com/google/uuid v1.3.0
diff --git a/go.sum b/go.sum
index cbe69f11..bc6880ea 100644
--- a/go.sum
+++ b/go.sum
@@ -62,6 +62,8 @@ github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230525204055-4f18e5eb7508
 github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230525204055-4f18e5eb7508/go.mod h1:hjmO5UfipWl6xkPT54acOs9DDto8GPV81IvsBcvRjsA=
 github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230529072326-695f97befe14 h1:0VZ5NbrtqvLvBRs0ioXBb9Mp8cOYRqG2WgAIf3+3dlw=
 github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230529072326-695f97befe14/go.mod h1:Rz967+t+aY6S+TBiW/WI8FM/C1WEMM+DamSMtKRxVAM=
+github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230529215936-13ccc22621bb h1:slNlMT8xB6w0QaMroTsqkNzNovUOEkpNpCawB7IjBFY=
+github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230529215936-13ccc22621bb/go.mod h1:SI+oF2+THMydq8Vo4+EzKJaQwtfWOy+lr7yWPP6FR2U=
 github.com/go-skynet/go-gpt2.cpp v0.0.0-20230523153133-3eb3a32c0874 h1:/6QWh2oarU7iPSpXj/3bLlkKptyxjKTRrNtGUrh8vhI=
 github.com/go-skynet/go-gpt2.cpp v0.0.0-20230523153133-3eb3a32c0874/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230520155239-ccf23adfb278 h1:st4ow9JKy3UuhkwutrbWof2vMFU/YxwBCLYZ1IxJ2Po=

From aacb96df7afbd63e1c44f5e0dce580a4714634ec Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 30 May 2023 12:00:30 +0200
Subject: [PATCH 072/137] fix: correctly handle errors from App constructor
 (#430)

Signed-off-by: mudler <mudler@mocaccino.org>
---
 Dockerfile     | 2 +-
 Dockerfile.dev | 2 +-
 api/api.go     | 8 ++++----
 main.go        | 9 +++++++--
 4 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index d730d4b4..60c30669 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -42,7 +42,7 @@ COPY . .
 RUN make build
 
 # Define the health check command
-HEALTHCHECK --interval=30s --timeout=360s --retries=10 \
+HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
   CMD curl -f $HEALTHCHECK_ENDPOINT || exit 1
 
 EXPOSE 8080
diff --git a/Dockerfile.dev b/Dockerfile.dev
index df44359a..bc4a3377 100644
--- a/Dockerfile.dev
+++ b/Dockerfile.dev
@@ -72,7 +72,7 @@ RUN apt-get install -y libgomp1 libopencv-core4.5 libopencv-imgcodecs4.5
 COPY --from=builder /build/local-ai /usr/bin/local-ai
 
 # Define the health check command
-HEALTHCHECK --interval=30s --timeout=360s --retries=10 \
+HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
   CMD curl -f $HEALTHCHECK_ENDPOINT || exit 1
 
 EXPOSE 8080
diff --git a/api/api.go b/api/api.go
index dd5f3022..fb3addca 100644
--- a/api/api.go
+++ b/api/api.go
@@ -11,7 +11,7 @@ import (
 	"github.com/rs/zerolog/log"
 )
 
-func App(opts ...AppOption) *fiber.App {
+func App(opts ...AppOption) (*fiber.App, error) {
 	options := newOptions(opts...)
 
 	zerolog.SetGlobalLevel(zerolog.InfoLevel)
@@ -71,13 +71,13 @@ func App(opts ...AppOption) *fiber.App {
 
 	if options.preloadJSONModels != "" {
 		if err := ApplyGalleryFromString(options.loader.ModelPath, options.preloadJSONModels, cm); err != nil {
-			return nil
+			return nil, err
 		}
 	}
 
 	if options.preloadModelsFromPath != "" {
 		if err := ApplyGalleryFromFile(options.loader.ModelPath, options.preloadModelsFromPath, cm); err != nil {
-			return nil
+			return nil, err
 		}
 	}
 
@@ -138,5 +138,5 @@ func App(opts ...AppOption) *fiber.App {
 	app.Get("/v1/models", listModels(options.loader, cm))
 	app.Get("/models", listModels(options.loader, cm))
 
-	return app
+	return app, nil
 }
diff --git a/main.go b/main.go
index f391affc..bdf95db7 100644
--- a/main.go
+++ b/main.go
@@ -111,7 +111,7 @@ It uses llama.cpp, ggml and gpt4all as backend with golang c bindings.
 		Copyright: "go-skynet authors",
 		Action: func(ctx *cli.Context) error {
 			fmt.Printf("Starting LocalAI using %d threads, with models path: %s\n", ctx.Int("threads"), ctx.String("models-path"))
-			return api.App(
+			app, err := api.App(
 				api.WithConfigFile(ctx.String("config-file")),
 				api.WithJSONStringPreload(ctx.String("preload-models")),
 				api.WithYAMLConfigPreload(ctx.String("preload-models-config")),
@@ -124,7 +124,12 @@ It uses llama.cpp, ggml and gpt4all as backend with golang c bindings.
 				api.WithCors(ctx.Bool("cors")),
 				api.WithCorsAllowOrigins(ctx.String("cors-allow-origins")),
 				api.WithThreads(ctx.Int("threads")),
-				api.WithUploadLimitMB(ctx.Int("upload-limit"))).Listen(ctx.String("address"))
+				api.WithUploadLimitMB(ctx.Int("upload-limit")))
+			if err != nil {
+				return err
+			}
+
+			return app.Listen(ctx.String("address"))
 		},
 	}
 

From ff8295a97cb6da081f4248e5124822ec21455e8a Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Tue, 30 May 2023 12:22:25 +0200
Subject: [PATCH 073/137] fix(deps): update github.com/go-skynet/go-llama.cpp
 digest to 4afcaf2 (#428)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 541c7a13..1b26b102 100644
--- a/go.mod
+++ b/go.mod
@@ -9,7 +9,7 @@ require (
 	github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf
 	github.com/go-skynet/go-bert.cpp v0.0.0-20230529074307-771b4a085972
 	github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230529215936-13ccc22621bb
-	github.com/go-skynet/go-llama.cpp v0.0.0-20230529120000-4bd3910005a5
+	github.com/go-skynet/go-llama.cpp v0.0.0-20230529221033-4afcaf28f36f
 	github.com/gofiber/fiber/v2 v2.46.0
 	github.com/google/uuid v1.3.0
 	github.com/hashicorp/go-multierror v1.1.1
diff --git a/go.sum b/go.sum
index bc6880ea..19c11ae9 100644
--- a/go.sum
+++ b/go.sum
@@ -74,6 +74,8 @@ github.com/go-skynet/go-llama.cpp v0.0.0-20230524233806-6e7e69a1607e h1:zfxPbHj7
 github.com/go-skynet/go-llama.cpp v0.0.0-20230524233806-6e7e69a1607e/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230529120000-4bd3910005a5 h1:AbKnkgzkjkyoJtjOHgR3+rmNKOOjmRja6De3HEa7S7E=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230529120000-4bd3910005a5/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230529221033-4afcaf28f36f h1:HmXiNF9Sy+34aSjaJ2/JN+goDgbT2XyLjdiG2EOMvaE=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230529221033-4afcaf28f36f/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
 github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=

From f401181cb5991fcce697749705864d1f99f13138 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 30 May 2023 12:35:32 +0200
Subject: [PATCH 074/137] fix: switch back to upstream for rwkv bindings (#432)

---
 Makefile        |  4 ++--
 api/api_test.go | 11 ++++++++---
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/Makefile b/Makefile
index 8a0f62aa..2d040e35 100644
--- a/Makefile
+++ b/Makefile
@@ -7,8 +7,8 @@ GOLLAMA_VERSION?=4bd3910005a593a6db237bc82c506d6d9fb81b18
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
 GPT4ALL_VERSION?=73db20ba85fbbdc66a56e2619394c0eea40dc72b
 GOGGMLTRANSFORMERS_VERSION?=695f97befe14f0107d8da1c11f5b84912e0754b6
-RWKV_REPO?=https://github.com/mudler/go-rwkv.cpp
-RWKV_VERSION?=dcbd34aff983b3d04fa300c5da5ec4bfdf6db295
+RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
+RWKV_VERSION?=ccb05c3e1c6efd098017d114dcb58ab3262b40b2
 WHISPER_CPP_VERSION?=9b926844e3ae0ca6a0d13573b2e0349be1a4b573
 BERT_VERSION?=cea1ed76a7f48ef386a8e369f6c82c48cdf2d551
 BLOOMZ_VERSION?=1834e77b83faafe912ad4092ccf7f77937349e2f
diff --git a/api/api_test.go b/api/api_test.go
index 4b245143..2fa7e43b 100644
--- a/api/api_test.go
+++ b/api/api_test.go
@@ -114,7 +114,8 @@ var _ = Describe("API test", func() {
 			modelLoader = model.NewModelLoader(tmpdir)
 			c, cancel = context.WithCancel(context.Background())
 
-			app = App(WithContext(c), WithModelLoader(modelLoader))
+			app, err = App(WithContext(c), WithModelLoader(modelLoader))
+			Expect(err).ToNot(HaveOccurred())
 			go app.Listen("127.0.0.1:9090")
 
 			defaultConfig := openai.DefaultConfig("")
@@ -198,7 +199,9 @@ var _ = Describe("API test", func() {
 			modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
 			c, cancel = context.WithCancel(context.Background())
 
-			app = App(WithContext(c), WithModelLoader(modelLoader))
+			var err error
+			app, err = App(WithContext(c), WithModelLoader(modelLoader))
+			Expect(err).ToNot(HaveOccurred())
 			go app.Listen("127.0.0.1:9090")
 
 			defaultConfig := openai.DefaultConfig("")
@@ -316,7 +319,9 @@ var _ = Describe("API test", func() {
 			modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
 			c, cancel = context.WithCancel(context.Background())
 
-			app = App(WithContext(c), WithModelLoader(modelLoader), WithConfigFile(os.Getenv("CONFIG_FILE")))
+			var err error
+			app, err = App(WithContext(c), WithModelLoader(modelLoader), WithConfigFile(os.Getenv("CONFIG_FILE")))
+			Expect(err).ToNot(HaveOccurred())
 			go app.Listen("127.0.0.1:9090")
 
 			defaultConfig := openai.DefaultConfig("")

From 49ce24984c87fe7bcf55d593a53aad37b49a81e4 Mon Sep 17 00:00:00 2001
From: Aisuko <urakiny@gmail.com>
Date: Tue, 30 May 2023 21:01:55 +1000
Subject: [PATCH 075/137] feat: Add more test-cases and remove dev container
 (#433)

Signed-off-by: Aisuko <urakiny@gmail.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 .devcontainer/Dockerfile         |  3 ---
 .devcontainer/devcontainer.json  | 46 --------------------------------
 .devcontainer/docker-compose.yml | 26 ------------------
 .gitignore                       |  4 +++
 .vscode/launch.json              |  2 +-
 Makefile                         |  1 +
 api/config_test.go               | 27 +++++++++++++++++++
 7 files changed, 33 insertions(+), 76 deletions(-)
 delete mode 100644 .devcontainer/Dockerfile
 delete mode 100644 .devcontainer/devcontainer.json
 delete mode 100644 .devcontainer/docker-compose.yml
 create mode 100644 api/config_test.go

diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
deleted file mode 100644
index cae178f8..00000000
--- a/.devcontainer/Dockerfile
+++ /dev/null
@@ -1,3 +0,0 @@
-ARG GO_VERSION=1.20
-FROM mcr.microsoft.com/devcontainers/go:0-$GO_VERSION-bullseye
-RUN apt-get update && apt-get install -y cmake
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
deleted file mode 100644
index c36f7299..00000000
--- a/.devcontainer/devcontainer.json
+++ /dev/null
@@ -1,46 +0,0 @@
-// For format details, see https://aka.ms/devcontainer.json. For config options, see the
-// README at: https://github.com/devcontainers/templates/tree/main/src/docker-existing-docker-compose
-{
-	"name": "Existing Docker Compose (Extend)",
-
-	// Update the 'dockerComposeFile' list if you have more compose files or use different names.
-	// The .devcontainer/docker-compose.yml file contains any overrides you need/want to make.
-	"dockerComposeFile": [
-		"../docker-compose.yaml",
-		"docker-compose.yml"
-	],
-
-	// The 'service' property is the name of the service for the container that VS Code should
-	// use. Update this value and .devcontainer/docker-compose.yml to the real service name.
-	"service": "api",
-
-	// The optional 'workspaceFolder' property is the path VS Code should open by default when
-	// connected. This is typically a file mount in .devcontainer/docker-compose.yml
-	"workspaceFolder": "/workspace",
-
-	"features": {
-		"ghcr.io/devcontainers/features/go:1": {},
-		"ghcr.io/azutake/devcontainer-features/go-packages-install:0": {}
-	},
-
-	// Features to add to the dev container. More info: https://containers.dev/features.
-	// "features": {},
-
-	// Use 'forwardPorts' to make a list of ports inside the container available locally.
-	// "forwardPorts": [],
-
-	// Uncomment the next line if you want start specific services in your Docker Compose config.
-	// "runServices": [],
-
-	// Uncomment the next line if you want to keep your containers running after VS Code shuts down.
-	// "shutdownAction": "none",
-
-	// Uncomment the next line to run commands after the container is created.
-	"postCreateCommand": "make prepare"
-
-	// Configure tool-specific properties.
-	// "customizations": {},
-
-	// Uncomment to connect as an existing user other than the container default. More info: https://aka.ms/dev-containers-non-root.
-	// "remoteUser": "devcontainer"
-}
diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml
deleted file mode 100644
index 265e5189..00000000
--- a/.devcontainer/docker-compose.yml
+++ /dev/null
@@ -1,26 +0,0 @@
-version: '3.6'
-services:
-  # Update this to the name of the service you want to work with in your docker-compose.yml file
-  api:
-    # Uncomment if you want to override the service's Dockerfile to one in the .devcontainer 
-    # folder. Note that the path of the Dockerfile and context is relative to the *primary* 
-    # docker-compose.yml file (the first in the devcontainer.json "dockerComposeFile"
-    # array). The sample below assumes your primary file is in the root of your project.
-    #
-    build:
-      context: .
-      dockerfile: .devcontainer/Dockerfile
-
-    volumes:
-      # Update this to wherever you want VS Code to mount the folder of your project
-      - .:/workspace:cached
-
-    # Uncomment the next four lines if you will use a ptrace-based debugger like C++, Go, and Rust.
-    # cap_add:
-    #   - SYS_PTRACE
-    # security_opt:
-    #   - seccomp:unconfined
-
-    # Overrides default command so things don't shut down after the process ends.
-    command: /bin/sh -c "while sleep 1000; do :; done"
- 
diff --git a/.gitignore b/.gitignore
index 10f3a35e..878047ee 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,9 +2,12 @@
 go-llama
 gpt4all
 go-stable-diffusion
+go-ggml-transformers
 go-gpt2
 go-rwkv
 whisper.cpp
+bloomz
+go-bert
 
 # LocalAI build binary
 LocalAI
@@ -15,6 +18,7 @@ local-ai
 # Ignore models
 models/*
 test-models/
+test-dir/
 
 release/
 
diff --git a/.vscode/launch.json b/.vscode/launch.json
index cf4fb924..2727da92 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -25,7 +25,7 @@
             ],
             "env": {
                 "C_INCLUDE_PATH": "${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz",
-                "LIBRARY_PATH": "$${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz",
+                "LIBRARY_PATH": "${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz",
                 "DEBUG": "true"
             }
         }
diff --git a/Makefile b/Makefile
index 2d040e35..0eb790d4 100644
--- a/Makefile
+++ b/Makefile
@@ -193,6 +193,7 @@ prepare: prepare-sources gpt4all/gpt4all-bindings/golang/libgpt4all.a $(OPTIONAL
 clean: ## Remove build related file
 	rm -fr ./go-llama
 	rm -rf ./gpt4all
+	rm -rf ./go-gpt2
 	rm -rf ./go-stable-diffusion
 	rm -rf ./go-ggml-transformers
 	rm -rf ./go-rwkv
diff --git a/api/config_test.go b/api/config_test.go
new file mode 100644
index 00000000..9950f803
--- /dev/null
+++ b/api/config_test.go
@@ -0,0 +1,27 @@
+package api
+
+import (
+	"os"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+var _ = Describe("Test cases for config related functions", func() {
+
+	var (
+		configFile string
+	)
+
+	Context("Test Read configuration functions", func() {
+		configFile = os.Getenv("CONFIG_FILE")
+		It("Test ReadConfigFile", func() {
+			config, err := ReadConfigFile(configFile)
+			Expect(err).To(BeNil())
+			Expect(config).ToNot(BeNil())
+			// two configs in config.yaml
+			Expect(len(config)).To(Equal(2))
+		})
+
+	})
+})

From 6d71dd7d988934e110cdb26d21d0d419df73d61b Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 30 May 2023 15:53:37 +0200
Subject: [PATCH 076/137] fix: do not build from the same container (#434)

Signed-off-by: mudler <mudler@mocaccino.org>
---
 Dockerfile    | 50 +++++++++++++++++++++++++++++++++++++++++++++++++-
 entrypoint.sh |  4 +++-
 2 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 60c30669..2401e95b 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,5 +1,6 @@
 ARG GO_VERSION=1.20
-FROM golang:$GO_VERSION
+
+FROM golang:$GO_VERSION as builder
 
 ARG BUILD_TYPE=
 ARG GO_TAGS=
@@ -41,6 +42,53 @@ RUN apt-get install -y libopencv-dev && \
 COPY . .
 RUN make build
 
+FROM golang:$GO_VERSION
+
+ARG BUILD_TYPE=
+ARG GO_TAGS=
+ARG CUDA_MAJOR_VERSION=11
+ARG CUDA_MINOR_VERSION=7
+
+ENV BUILD_TYPE=${BUILD_TYPE}
+ENV GO_TAGS=${GO_TAGS}
+ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
+ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
+ENV NVIDIA_VISIBLE_DEVICES=all
+ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
+ENV HEALTHCHECK_TIMEOUT=10m
+ENV HEALTHCHECK_INTERVAL=10m
+ENV HEALTHCHECK_RETRIES=10m
+
+ENV REBUILD=true
+
+WORKDIR /build
+
+RUN apt-get update && \
+    apt-get install -y ca-certificates cmake curl
+
+# CuBLAS requirements
+RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
+    apt-get install -y software-properties-common && \
+    apt-add-repository contrib && \
+    curl -O https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.0-1_all.deb && \
+    dpkg -i cuda-keyring_1.0-1_all.deb && \
+    rm -f cuda-keyring_1.0-1_all.deb && \
+    apt-get update && \
+    apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+    ; fi
+ENV PATH /usr/local/cuda/bin:${PATH}
+
+# OpenBLAS requirements
+RUN apt-get install -y libopenblas-dev
+
+# Stable Diffusion requirements
+RUN apt-get install -y libopencv-dev && \
+    ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
+
+COPY . .
+RUN make prepare-sources
+COPY --from=builder /build/local-ai ./
+
 # Define the health check command
 HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
   CMD curl -f $HEALTHCHECK_ENDPOINT || exit 1
diff --git a/entrypoint.sh b/entrypoint.sh
index e7390e56..28c40454 100755
--- a/entrypoint.sh
+++ b/entrypoint.sh
@@ -1,9 +1,11 @@
 #!/bin/bash
+set -e
 
 cd /build
 
 if [ "$REBUILD" != "false" ]; then
-	make rebuild
+	rm -rf ./local-ai
+	make build
 fi
 
 ./local-ai "$@"
\ No newline at end of file

From 577d36b5964ba0873b945f2ea00404ed52556408 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 30 May 2023 15:58:10 +0200
Subject: [PATCH 077/137] images: cleanup, drop .dev Dockerfile (#437)

Signed-off-by: mudler <mudler@mocaccino.org>
---
 Dockerfile                                    |  3 -
 Dockerfile.dev                                | 79 -------------------
 docker-compose.yaml                           |  2 +-
 examples/chatbot-ui/docker-compose.yaml       |  2 +-
 examples/discord-bot/docker-compose.yaml      |  2 +-
 examples/langchain-python/docker-compose.yaml |  2 +-
 examples/langchain/docker-compose.yaml        |  2 +-
 examples/rwkv/docker-compose.yaml             |  2 +-
 examples/slack-bot/docker-compose.yaml        |  2 +-
 9 files changed, 7 insertions(+), 89 deletions(-)
 delete mode 100644 Dockerfile.dev

diff --git a/Dockerfile b/Dockerfile
index 2401e95b..f7fdbb8e 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -55,9 +55,6 @@ ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
 ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
 ENV NVIDIA_VISIBLE_DEVICES=all
 ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
-ENV HEALTHCHECK_TIMEOUT=10m
-ENV HEALTHCHECK_INTERVAL=10m
-ENV HEALTHCHECK_RETRIES=10m
 
 ENV REBUILD=true
 
diff --git a/Dockerfile.dev b/Dockerfile.dev
deleted file mode 100644
index bc4a3377..00000000
--- a/Dockerfile.dev
+++ /dev/null
@@ -1,79 +0,0 @@
-ARG GO_VERSION=1.20
-ARG DEBIAN_VERSION=11
-FROM golang:$GO_VERSION as builder
-
-ARG BUILD_TYPE=
-ARG GO_TAGS=
-ARG CUDA_MAJOR_VERSION=11
-ARG CUDA_MINOR_VERSION=7
-
-ENV BUILD_TYPE=${BUILD_TYPE}
-ENV GO_TAGS=${GO_TAGS}
-
-WORKDIR /build
-
-RUN apt-get update && \
-    apt-get install -y cmake
-
-# CuBLAS requirements
-RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
-    apt-get install -y software-properties-common && \
-    apt-add-repository contrib && \
-    curl -O https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.0-1_all.deb && \
-    dpkg -i cuda-keyring_1.0-1_all.deb && \
-    rm -f cuda-keyring_1.0-1_all.deb && \
-    apt-get update && \
-    apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
-    ; fi
-ENV PATH /usr/local/cuda/bin:${PATH}
-
-# OpenBLAS requirements
-RUN apt-get install -y libopenblas-dev
-
-# Stable Diffusion requirements
-RUN apt-get install -y libopencv-dev && \
-    ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
-
-COPY . .
-RUN make build
-
-FROM debian:$DEBIAN_VERSION
-
-ARG BUILD_TYPE=
-ARG GO_TAGS=
-ARG CUDA_MAJOR_VERSION=11
-ARG CUDA_MINOR_VERSION=7
-
-ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
-ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
-ENV NVIDIA_VISIBLE_DEVICES=all
-ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
-
-RUN apt-get update && \
-    apt-get install -y ca-certificates curl
-
-# CuBLAS requirements
-RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
-    apt-get install -y curl software-properties-common && \
-    apt-add-repository contrib && \
-    curl -O https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.0-1_all.deb && \
-    dpkg -i cuda-keyring_1.0-1_all.deb && \
-    rm -f cuda-keyring_1.0-1_all.deb && \
-    apt-get update && \
-    apt-get install -y cuda-cudart-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
-    ; fi
-
-# OpenBLAS requirements
-RUN apt-get install -y libopenblas0
-
-# Stable Diffusion requirements
-RUN apt-get install -y libgomp1 libopencv-core4.5 libopencv-imgcodecs4.5
-
-COPY --from=builder /build/local-ai /usr/bin/local-ai
-
-# Define the health check command
-HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
-  CMD curl -f $HEALTHCHECK_ENDPOINT || exit 1
-
-EXPOSE 8080
-ENTRYPOINT [ "/usr/bin/local-ai" ]
diff --git a/docker-compose.yaml b/docker-compose.yaml
index f7d76538..6217b9f6 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -5,7 +5,7 @@ services:
     image: quay.io/go-skynet/local-ai:latest
     build:
       context: .
-      dockerfile: Dockerfile.dev
+      dockerfile: Dockerfile
     ports:
       - 8080:8080
     env_file:
diff --git a/examples/chatbot-ui/docker-compose.yaml b/examples/chatbot-ui/docker-compose.yaml
index a5f94aca..c7782c34 100644
--- a/examples/chatbot-ui/docker-compose.yaml
+++ b/examples/chatbot-ui/docker-compose.yaml
@@ -5,7 +5,7 @@ services:
     image: quay.io/go-skynet/local-ai:latest
     build:
       context: ../../
-      dockerfile: Dockerfile.dev
+      dockerfile: Dockerfile
     ports:
       - 8080:8080
     environment:
diff --git a/examples/discord-bot/docker-compose.yaml b/examples/discord-bot/docker-compose.yaml
index 6a10f306..e7ee6b4c 100644
--- a/examples/discord-bot/docker-compose.yaml
+++ b/examples/discord-bot/docker-compose.yaml
@@ -5,7 +5,7 @@ services:
     image: quay.io/go-skynet/local-ai:latest
     build:
       context: ../../
-      dockerfile: Dockerfile.dev
+      dockerfile: Dockerfile
     ports:
       - 8080:8080
     environment:
diff --git a/examples/langchain-python/docker-compose.yaml b/examples/langchain-python/docker-compose.yaml
index ed3eaec6..8bd61b5f 100644
--- a/examples/langchain-python/docker-compose.yaml
+++ b/examples/langchain-python/docker-compose.yaml
@@ -5,7 +5,7 @@ services:
     image: quay.io/go-skynet/local-ai:latest
     build:
       context: ../../
-      dockerfile: Dockerfile.dev
+      dockerfile: Dockerfile
     ports:
       - 8080:8080
     environment:
diff --git a/examples/langchain/docker-compose.yaml b/examples/langchain/docker-compose.yaml
index cafcb490..32564fef 100644
--- a/examples/langchain/docker-compose.yaml
+++ b/examples/langchain/docker-compose.yaml
@@ -5,7 +5,7 @@ services:
     image: quay.io/go-skynet/local-ai:latest
     build:
       context: ../../
-      dockerfile: Dockerfile.dev
+      dockerfile: Dockerfile
     ports:
       - 8080:8080
     environment:
diff --git a/examples/rwkv/docker-compose.yaml b/examples/rwkv/docker-compose.yaml
index ed3eaec6..8bd61b5f 100644
--- a/examples/rwkv/docker-compose.yaml
+++ b/examples/rwkv/docker-compose.yaml
@@ -5,7 +5,7 @@ services:
     image: quay.io/go-skynet/local-ai:latest
     build:
       context: ../../
-      dockerfile: Dockerfile.dev
+      dockerfile: Dockerfile
     ports:
       - 8080:8080
     environment:
diff --git a/examples/slack-bot/docker-compose.yaml b/examples/slack-bot/docker-compose.yaml
index 35a7501a..12cb70b0 100644
--- a/examples/slack-bot/docker-compose.yaml
+++ b/examples/slack-bot/docker-compose.yaml
@@ -5,7 +5,7 @@ services:
     image: quay.io/go-skynet/local-ai:latest
     build:
       context: ../../
-      dockerfile: Dockerfile.dev
+      dockerfile: Dockerfile
     ports:
       - 8080:8080
     environment:

From 11af09faf37e97cfaccd824e3863b4263f543fe6 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 30 May 2023 18:29:28 +0200
Subject: [PATCH 078/137] examples: use gallery in chatbot-ui, add flowise
 (#438)

Signed-off-by: mudler <mudler@mocaccino.org>
---
 README.md                                     |  1 +
 examples/README.md                            |  8 ++++
 examples/chatbot-ui/README.md                 | 12 ++----
 examples/chatbot-ui/docker-compose.yaml       | 15 +++++++-
 examples/chatbot-ui/models/completion.tmpl    |  1 -
 examples/chatbot-ui/models/gpt-3.5-turbo.yaml | 16 --------
 examples/chatbot-ui/models/gpt4all.tmpl       |  4 --
 examples/flowise/README.md                    | 26 +++++++++++++
 examples/flowise/docker-compose.yaml          | 37 +++++++++++++++++++
 9 files changed, 90 insertions(+), 30 deletions(-)
 delete mode 100644 examples/chatbot-ui/models/completion.tmpl
 delete mode 100644 examples/chatbot-ui/models/gpt-3.5-turbo.yaml
 delete mode 100644 examples/chatbot-ui/models/gpt4all.tmpl
 create mode 100644 examples/flowise/README.md
 create mode 100644 examples/flowise/docker-compose.yaml

diff --git a/README.md b/README.md
index f3777af7..ae312d4f 100644
--- a/README.md
+++ b/README.md
@@ -169,6 +169,7 @@ Feel free to open up a PR to get your project listed!
 - [Spark](https://github.com/cedriking/spark)
 - [autogpt4all](https://github.com/aorumbayev/autogpt4all)
 - [Mods](https://github.com/charmbracelet/mods)
+- [Flowise](https://github.com/FlowiseAI/Flowise)
 
 ## Short-term roadmap
 
diff --git a/examples/README.md b/examples/README.md
index 0229441b..e3257cdb 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -22,6 +22,14 @@ This integration shows how to use LocalAI with [mckaywrigley/chatbot-ui](https:/
 
 [Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui/)
 
+### Flowise
+
+_by [@mudler](https://github.com/mudler)_
+
+This example shows how to use [FlowiseAI/Flowise](https://github.com/FlowiseAI/Flowise) with LocalAI.
+
+[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/flowise/)
+
 ### Discord bot
 
 _by [@mudler](https://github.com/mudler)_
diff --git a/examples/chatbot-ui/README.md b/examples/chatbot-ui/README.md
index 7cf4bbb2..fa7ee368 100644
--- a/examples/chatbot-ui/README.md
+++ b/examples/chatbot-ui/README.md
@@ -4,22 +4,18 @@ Example of integration with [mckaywrigley/chatbot-ui](https://github.com/mckaywr
 
 ![Screenshot from 2023-04-26 23-59-55](https://user-images.githubusercontent.com/2420543/234715439-98d12e03-d3ce-4f94-ab54-2b256808e05e.png)
 
-## Setup
+## Run
 
+In this example LocalAI will download the gpt4all model and set it up as "gpt-3.5-turbo". See the `docker-compose.yaml`
 ```bash
 # Clone LocalAI
 git clone https://github.com/go-skynet/LocalAI
 
 cd LocalAI/examples/chatbot-ui
 
-# (optional) Checkout a specific LocalAI tag
-# git checkout -b build <TAG>
-
-# Download gpt4all-j to models/
-wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
-
 # start with docker-compose
-docker-compose up -d --pull always
+docker-compose up --pull always
+
 # or you can build the images with:
 # docker-compose up -d --build
 ```
diff --git a/examples/chatbot-ui/docker-compose.yaml b/examples/chatbot-ui/docker-compose.yaml
index c7782c34..27b4f4e0 100644
--- a/examples/chatbot-ui/docker-compose.yaml
+++ b/examples/chatbot-ui/docker-compose.yaml
@@ -3,6 +3,14 @@ version: '3.6'
 services:
   api:
     image: quay.io/go-skynet/local-ai:latest
+    # As initially LocalAI will download the models defined in PRELOAD_MODELS
+    # you might need to tweak the healthcheck values here according to your network connection.
+    # Here we give a timespan of 20m to download all the required files.
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8080/readyz"]
+      interval: 1m
+      timeout: 20m
+      retries: 20
     build:
       context: ../../
       dockerfile: Dockerfile
@@ -11,11 +19,16 @@ services:
     environment:
       - DEBUG=true
       - MODELS_PATH=/models
+      # You can preload different models here as well.
+      # See: https://github.com/go-skynet/model-gallery
+      - 'PRELOAD_MODELS=[{"url": "github:go-skynet/model-gallery/gpt4all-j.yaml", "name": "gpt-3.5-turbo"}]'
     volumes:
       - ./models:/models:cached
     command: ["/usr/bin/local-ai" ]
-
   chatgpt:
+    depends_on:
+      api:
+        condition: service_healthy
     image: ghcr.io/mckaywrigley/chatbot-ui:main
     ports:
       - 3000:3000
diff --git a/examples/chatbot-ui/models/completion.tmpl b/examples/chatbot-ui/models/completion.tmpl
deleted file mode 100644
index 9867cfcd..00000000
--- a/examples/chatbot-ui/models/completion.tmpl
+++ /dev/null
@@ -1 +0,0 @@
-{{.Input}}
\ No newline at end of file
diff --git a/examples/chatbot-ui/models/gpt-3.5-turbo.yaml b/examples/chatbot-ui/models/gpt-3.5-turbo.yaml
deleted file mode 100644
index 5c192f5d..00000000
--- a/examples/chatbot-ui/models/gpt-3.5-turbo.yaml
+++ /dev/null
@@ -1,16 +0,0 @@
-name: gpt-3.5-turbo
-parameters:
-  model: ggml-gpt4all-j
-  top_k: 80
-  temperature: 0.2
-  top_p: 0.7
-context_size: 1024
-stopwords:
-- "HUMAN:"
-- "GPT:"
-roles:
-  user: " "
-  system: " "
-template:
-  completion: completion
-  chat: gpt4all
\ No newline at end of file
diff --git a/examples/chatbot-ui/models/gpt4all.tmpl b/examples/chatbot-ui/models/gpt4all.tmpl
deleted file mode 100644
index f76b080a..00000000
--- a/examples/chatbot-ui/models/gpt4all.tmpl
+++ /dev/null
@@ -1,4 +0,0 @@
-The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
-### Prompt:
-{{.Input}}
-### Response:
diff --git a/examples/flowise/README.md b/examples/flowise/README.md
new file mode 100644
index 00000000..d8bb13d4
--- /dev/null
+++ b/examples/flowise/README.md
@@ -0,0 +1,26 @@
+# flowise
+
+Example of integration with [FlowiseAI/Flowise](https://github.com/FlowiseAI/Flowise).
+
+![Screenshot from 2023-05-30 18-01-03](https://github.com/go-skynet/LocalAI/assets/2420543/02458782-0549-4131-971c-95ee56ec1af8)
+
+You can check a demo video in the Flowise PR: https://github.com/FlowiseAI/Flowise/pull/123
+
+## Run
+
+In this example LocalAI will download the gpt4all model and set it up as "gpt-3.5-turbo". See the `docker-compose.yaml`
+```bash
+# Clone LocalAI
+git clone https://github.com/go-skynet/LocalAI
+
+cd LocalAI/examples/flowise
+
+# start with docker-compose
+docker-compose up --pull always
+
+```
+
+## Accessing flowise
+
+Open http://localhost:3000.
+
diff --git a/examples/flowise/docker-compose.yaml b/examples/flowise/docker-compose.yaml
new file mode 100644
index 00000000..a39269c7
--- /dev/null
+++ b/examples/flowise/docker-compose.yaml
@@ -0,0 +1,37 @@
+version: '3.6'
+
+services:
+  api:
+    image: quay.io/go-skynet/local-ai:latest
+    # As initially LocalAI will download the models defined in PRELOAD_MODELS
+    # you might need to tweak the healthcheck values here according to your network connection.
+    # Here we give a timespan of 20m to download all the required files.
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8080/readyz"]
+      interval: 1m
+      timeout: 20m
+      retries: 20
+    build:
+      context: ../../
+      dockerfile: Dockerfile
+    ports:
+      - 8080:8080
+    environment:
+      - DEBUG=true
+      - MODELS_PATH=/models
+      # You can preload different models here as well.
+      # See: https://github.com/go-skynet/model-gallery
+      - 'PRELOAD_MODELS=[{"url": "github:go-skynet/model-gallery/gpt4all-j.yaml", "name": "gpt-3.5-turbo"}]'
+    volumes:
+      - ./models:/models:cached
+    command: ["/usr/bin/local-ai" ]
+  flowise:
+    depends_on:
+      api:
+        condition: service_healthy
+    image: flowiseai/flowise
+    ports:
+      - 3000:3000
+    volumes:
+        - ~/.flowise:/root/.flowise
+    command: /bin/sh -c "sleep 3; flowise start"
\ No newline at end of file

From 2f3c3b18676422c93ba142919ece7b495d8238a2 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 30 May 2023 18:34:43 +0200
Subject: [PATCH 079/137] examples: keep old example around (#439)

---
 examples/README.md                            |  2 +
 examples/chatbot-ui-manual/README.md          | 48 +++++++++++++++++++
 .../chatbot-ui-manual/docker-compose.yaml     | 24 ++++++++++
 .../chatbot-ui-manual/models/completion.tmpl  |  1 +
 .../models/gpt-3.5-turbo.yaml                 | 16 +++++++
 .../chatbot-ui-manual/models/gpt4all.tmpl     |  4 ++
 6 files changed, 95 insertions(+)
 create mode 100644 examples/chatbot-ui-manual/README.md
 create mode 100644 examples/chatbot-ui-manual/docker-compose.yaml
 create mode 100644 examples/chatbot-ui-manual/models/completion.tmpl
 create mode 100644 examples/chatbot-ui-manual/models/gpt-3.5-turbo.yaml
 create mode 100644 examples/chatbot-ui-manual/models/gpt4all.tmpl

diff --git a/examples/README.md b/examples/README.md
index e3257cdb..2285ed19 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -22,6 +22,8 @@ This integration shows how to use LocalAI with [mckaywrigley/chatbot-ui](https:/
 
 [Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui/)
 
+There is also a separate example to show how to manually setup a model: [example](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui-manual/)
+
 ### Flowise
 
 _by [@mudler](https://github.com/mudler)_
diff --git a/examples/chatbot-ui-manual/README.md b/examples/chatbot-ui-manual/README.md
new file mode 100644
index 00000000..7cf4bbb2
--- /dev/null
+++ b/examples/chatbot-ui-manual/README.md
@@ -0,0 +1,48 @@
+# chatbot-ui
+
+Example of integration with [mckaywrigley/chatbot-ui](https://github.com/mckaywrigley/chatbot-ui).
+
+![Screenshot from 2023-04-26 23-59-55](https://user-images.githubusercontent.com/2420543/234715439-98d12e03-d3ce-4f94-ab54-2b256808e05e.png)
+
+## Setup
+
+```bash
+# Clone LocalAI
+git clone https://github.com/go-skynet/LocalAI
+
+cd LocalAI/examples/chatbot-ui
+
+# (optional) Checkout a specific LocalAI tag
+# git checkout -b build <TAG>
+
+# Download gpt4all-j to models/
+wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
+
+# start with docker-compose
+docker-compose up -d --pull always
+# or you can build the images with:
+# docker-compose up -d --build
+```
+
+## Pointing chatbot-ui to a separately managed LocalAI service
+
+If you want to use the [chatbot-ui example](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui) with an externally managed LocalAI service, you can alter the `docker-compose` file so that it looks like the below. You will notice the file is smaller, because we have removed the section that would normally start the LocalAI service. Take care to update the IP address (or FQDN) that the chatbot-ui service tries to access (marked `<<LOCALAI_IP>>` below):
+```
+version: '3.6'
+
+services:
+  chatgpt:
+    image: ghcr.io/mckaywrigley/chatbot-ui:main
+    ports:
+      - 3000:3000
+    environment:
+      - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
+      - 'OPENAI_API_HOST=http://<<LOCALAI_IP>>:8080'
+```
+
+Once you've edited the Dockerfile, you can start it with `docker compose up`, then browse to `http://localhost:3000`.
+
+## Accessing chatbot-ui
+
+Open http://localhost:3000 for the Web UI.
+
diff --git a/examples/chatbot-ui-manual/docker-compose.yaml b/examples/chatbot-ui-manual/docker-compose.yaml
new file mode 100644
index 00000000..c7782c34
--- /dev/null
+++ b/examples/chatbot-ui-manual/docker-compose.yaml
@@ -0,0 +1,24 @@
+version: '3.6'
+
+services:
+  api:
+    image: quay.io/go-skynet/local-ai:latest
+    build:
+      context: ../../
+      dockerfile: Dockerfile
+    ports:
+      - 8080:8080
+    environment:
+      - DEBUG=true
+      - MODELS_PATH=/models
+    volumes:
+      - ./models:/models:cached
+    command: ["/usr/bin/local-ai" ]
+
+  chatgpt:
+    image: ghcr.io/mckaywrigley/chatbot-ui:main
+    ports:
+      - 3000:3000
+    environment:
+      - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
+      - 'OPENAI_API_HOST=http://api:8080'
\ No newline at end of file
diff --git a/examples/chatbot-ui-manual/models/completion.tmpl b/examples/chatbot-ui-manual/models/completion.tmpl
new file mode 100644
index 00000000..9867cfcd
--- /dev/null
+++ b/examples/chatbot-ui-manual/models/completion.tmpl
@@ -0,0 +1 @@
+{{.Input}}
\ No newline at end of file
diff --git a/examples/chatbot-ui-manual/models/gpt-3.5-turbo.yaml b/examples/chatbot-ui-manual/models/gpt-3.5-turbo.yaml
new file mode 100644
index 00000000..5c192f5d
--- /dev/null
+++ b/examples/chatbot-ui-manual/models/gpt-3.5-turbo.yaml
@@ -0,0 +1,16 @@
+name: gpt-3.5-turbo
+parameters:
+  model: ggml-gpt4all-j
+  top_k: 80
+  temperature: 0.2
+  top_p: 0.7
+context_size: 1024
+stopwords:
+- "HUMAN:"
+- "GPT:"
+roles:
+  user: " "
+  system: " "
+template:
+  completion: completion
+  chat: gpt4all
\ No newline at end of file
diff --git a/examples/chatbot-ui-manual/models/gpt4all.tmpl b/examples/chatbot-ui-manual/models/gpt4all.tmpl
new file mode 100644
index 00000000..f76b080a
--- /dev/null
+++ b/examples/chatbot-ui-manual/models/gpt4all.tmpl
@@ -0,0 +1,4 @@
+The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
+### Prompt:
+{{.Input}}
+### Response:

From abd3c621944e2e3b05b9ff6ab397920444abf797 Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Tue, 30 May 2023 23:03:48 +0200
Subject: [PATCH 080/137] :arrow_up: Update go-skynet/go-llama.cpp (#443)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 0eb790d4..992485d0 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@ GOTEST=$(GOCMD) test
 GOVET=$(GOCMD) vet
 BINARY_NAME=local-ai
 
-GOLLAMA_VERSION?=4bd3910005a593a6db237bc82c506d6d9fb81b18
+GOLLAMA_VERSION?=62b6c079a47d6949c982ed8e684b94bdbf48b41c
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
 GPT4ALL_VERSION?=73db20ba85fbbdc66a56e2619394c0eea40dc72b
 GOGGMLTRANSFORMERS_VERSION?=695f97befe14f0107d8da1c11f5b84912e0754b6

From f711d35377ed7fefbfba2d413266fa530601993e Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Tue, 30 May 2023 23:04:10 +0200
Subject: [PATCH 081/137] :arrow_up: Update go-skynet/go-ggml-transformers.cpp
 (#442)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 992485d0..ad34746b 100644
--- a/Makefile
+++ b/Makefile
@@ -6,7 +6,7 @@ BINARY_NAME=local-ai
 GOLLAMA_VERSION?=62b6c079a47d6949c982ed8e684b94bdbf48b41c
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
 GPT4ALL_VERSION?=73db20ba85fbbdc66a56e2619394c0eea40dc72b
-GOGGMLTRANSFORMERS_VERSION?=695f97befe14f0107d8da1c11f5b84912e0754b6
+GOGGMLTRANSFORMERS_VERSION?=13ccc22621bb21afecd38675a2b043498e2e756c
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=ccb05c3e1c6efd098017d114dcb58ab3262b40b2
 WHISPER_CPP_VERSION?=9b926844e3ae0ca6a0d13573b2e0349be1a4b573

From b8ef9028f1e64073ff070473d56e57117ad3096f Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Tue, 30 May 2023 23:04:34 +0200
Subject: [PATCH 082/137] fix(deps): update github.com/go-skynet/go-llama.cpp
 digest to 62b6c07 (#441)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 1b26b102..d79fdd63 100644
--- a/go.mod
+++ b/go.mod
@@ -9,7 +9,7 @@ require (
 	github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf
 	github.com/go-skynet/go-bert.cpp v0.0.0-20230529074307-771b4a085972
 	github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230529215936-13ccc22621bb
-	github.com/go-skynet/go-llama.cpp v0.0.0-20230529221033-4afcaf28f36f
+	github.com/go-skynet/go-llama.cpp v0.0.0-20230530191504-62b6c079a47d
 	github.com/gofiber/fiber/v2 v2.46.0
 	github.com/google/uuid v1.3.0
 	github.com/hashicorp/go-multierror v1.1.1
diff --git a/go.sum b/go.sum
index 19c11ae9..b5452689 100644
--- a/go.sum
+++ b/go.sum
@@ -76,6 +76,8 @@ github.com/go-skynet/go-llama.cpp v0.0.0-20230529120000-4bd3910005a5 h1:AbKnkgzk
 github.com/go-skynet/go-llama.cpp v0.0.0-20230529120000-4bd3910005a5/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230529221033-4afcaf28f36f h1:HmXiNF9Sy+34aSjaJ2/JN+goDgbT2XyLjdiG2EOMvaE=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230529221033-4afcaf28f36f/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230530191504-62b6c079a47d h1:daPcVEptc/6arcS/QV4QDCdYiwMGCiiR5rnzUs63WK0=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230530191504-62b6c079a47d/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
 github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=

From fa58965bbcdce626c75642f1777eeede089ddfa3 Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Tue, 30 May 2023 23:04:53 +0200
Subject: [PATCH 083/137] :arrow_up: Update ggerganov/whisper.cpp (#419)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index ad34746b..51377b0c 100644
--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,7 @@ GPT4ALL_VERSION?=73db20ba85fbbdc66a56e2619394c0eea40dc72b
 GOGGMLTRANSFORMERS_VERSION?=13ccc22621bb21afecd38675a2b043498e2e756c
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=ccb05c3e1c6efd098017d114dcb58ab3262b40b2
-WHISPER_CPP_VERSION?=9b926844e3ae0ca6a0d13573b2e0349be1a4b573
+WHISPER_CPP_VERSION?=d7c936b44a80b8070676093fc00622333ba09cd3
 BERT_VERSION?=cea1ed76a7f48ef386a8e369f6c82c48cdf2d551
 BLOOMZ_VERSION?=1834e77b83faafe912ad4092ccf7f77937349e2f
 BUILD_TYPE?=

From 9e3ca6d1a3ec4f37ad5c49b3f2a596f2e73fff72 Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Wed, 31 May 2023 00:44:52 +0200
Subject: [PATCH 084/137] :arrow_up: Update nomic-ai/gpt4all (#422)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 51377b0c..544a94c1 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
 
 GOLLAMA_VERSION?=62b6c079a47d6949c982ed8e684b94bdbf48b41c
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
-GPT4ALL_VERSION?=73db20ba85fbbdc66a56e2619394c0eea40dc72b
+GPT4ALL_VERSION?=337c7fecacfa4ae6779046513ab090687a5b0ef6
 GOGGMLTRANSFORMERS_VERSION?=13ccc22621bb21afecd38675a2b043498e2e756c
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=ccb05c3e1c6efd098017d114dcb58ab3262b40b2

From 5623a7c33117134ab96335010258562813d5bfd0 Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Wed, 31 May 2023 00:45:07 +0200
Subject: [PATCH 085/137] :arrow_up: Update go-skynet/go-bert.cpp (#418)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 544a94c1..035bd5a3 100644
--- a/Makefile
+++ b/Makefile
@@ -10,7 +10,7 @@ GOGGMLTRANSFORMERS_VERSION?=13ccc22621bb21afecd38675a2b043498e2e756c
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=ccb05c3e1c6efd098017d114dcb58ab3262b40b2
 WHISPER_CPP_VERSION?=d7c936b44a80b8070676093fc00622333ba09cd3
-BERT_VERSION?=cea1ed76a7f48ef386a8e369f6c82c48cdf2d551
+BERT_VERSION?=771b4a08597224b21cff070950ef4f68690e14ad
 BLOOMZ_VERSION?=1834e77b83faafe912ad4092ccf7f77937349e2f
 BUILD_TYPE?=
 CGO_LDFLAGS?=

From 898ca62b5591dce14f95b0d5a093bc6edd41518a Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Wed, 31 May 2023 08:49:47 +0200
Subject: [PATCH 086/137] fix(deps): update module github.com/onsi/ginkgo/v2 to
 v2.9.7 (#445)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index d79fdd63..f4423716 100644
--- a/go.mod
+++ b/go.mod
@@ -16,7 +16,7 @@ require (
 	github.com/imdario/mergo v0.3.16
 	github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642
 	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230528235700-9eb81cb54922
-	github.com/onsi/ginkgo/v2 v2.9.5
+	github.com/onsi/ginkgo/v2 v2.9.7
 	github.com/onsi/gomega v1.27.7
 	github.com/otiai10/openaigo v1.1.0
 	github.com/rs/zerolog v1.29.1
diff --git a/go.sum b/go.sum
index b5452689..26b3e0ab 100644
--- a/go.sum
+++ b/go.sum
@@ -141,6 +141,8 @@ github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230528235700-9eb81c
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230528235700-9eb81cb54922/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/onsi/ginkgo/v2 v2.9.5 h1:+6Hr4uxzP4XIUyAkg61dWBw8lb/gc4/X5luuxN/EC+Q=
 github.com/onsi/ginkgo/v2 v2.9.5/go.mod h1:tvAoo1QUJwNEU2ITftXTpR7R1RbCzoZUOs3RonqW57k=
+github.com/onsi/ginkgo/v2 v2.9.7 h1:06xGQy5www2oN160RtEZoTvnP2sPhEfePYmCDc2szss=
+github.com/onsi/ginkgo/v2 v2.9.7/go.mod h1:cxrmXWykAwTwhQsJOPfdIDiJ+l2RYq7U8hFU+M/1uw0=
 github.com/onsi/gomega v1.27.7 h1:fVih9JD6ogIiHUN6ePK7HJidyEDpWGVB5mzM7cWNXoU=
 github.com/onsi/gomega v1.27.7/go.mod h1:1p8OOlwo2iUUDsHnOrjE5UKYJ+e3W8eQ3qSlRahPmr4=
 github.com/otiai10/mint v1.5.1 h1:XaPLeE+9vGbuyEHem1JNk3bYc7KKqyI/na0/mLd/Kks=

From 16050a32c760ed8cd68e5a63c44fda05a072b4c7 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Wed, 31 May 2023 12:00:01 +0200
Subject: [PATCH 087/137] fix(deps): update
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 5f94020 (#435)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index f4423716..b5a559d0 100644
--- a/go.mod
+++ b/go.mod
@@ -15,7 +15,7 @@ require (
 	github.com/hashicorp/go-multierror v1.1.1
 	github.com/imdario/mergo v0.3.16
 	github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642
-	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230528235700-9eb81cb54922
+	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230531011104-5f940208e4f5
 	github.com/onsi/ginkgo/v2 v2.9.7
 	github.com/onsi/gomega v1.27.7
 	github.com/otiai10/openaigo v1.1.0
diff --git a/go.sum b/go.sum
index 26b3e0ab..df980fdb 100644
--- a/go.sum
+++ b/go.sum
@@ -139,6 +139,8 @@ github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230526132403-a6f3e9
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230526132403-a6f3e94458e2/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230528235700-9eb81cb54922 h1:teYhrXxFY28gyBm6QMcYewA0KvLXqkUsgxJcYelaxbg=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230528235700-9eb81cb54922/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230531011104-5f940208e4f5 h1:99cF+V5wk7IInDAEM9HAlSHdLf/xoJR529Wr8lAG5KQ=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230531011104-5f940208e4f5/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/onsi/ginkgo/v2 v2.9.5 h1:+6Hr4uxzP4XIUyAkg61dWBw8lb/gc4/X5luuxN/EC+Q=
 github.com/onsi/ginkgo/v2 v2.9.5/go.mod h1:tvAoo1QUJwNEU2ITftXTpR7R1RbCzoZUOs3RonqW57k=
 github.com/onsi/ginkgo/v2 v2.9.7 h1:06xGQy5www2oN160RtEZoTvnP2sPhEfePYmCDc2szss=

From 432b0223f1e0479ef82dadd0f6f667d92f881607 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Wed, 31 May 2023 14:59:36 +0200
Subject: [PATCH 088/137] fix(deps): update github.com/donomii/go-rwkv.cpp
 digest to c43cdf5 (#453)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index b5a559d0..192a5b27 100644
--- a/go.mod
+++ b/go.mod
@@ -3,7 +3,7 @@ module github.com/go-skynet/LocalAI
 go 1.19
 
 require (
-	github.com/donomii/go-rwkv.cpp v0.0.0-20230529074347-ccb05c3e1c6e
+	github.com/donomii/go-rwkv.cpp v0.0.0-20230531084548-c43cdf5fc5bf
 	github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230528233858-d7c936b44a80
 	github.com/go-audio/wav v1.1.0
 	github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf
diff --git a/go.sum b/go.sum
index df980fdb..d1fb0be7 100644
--- a/go.sum
+++ b/go.sum
@@ -20,6 +20,8 @@ github.com/donomii/go-rwkv.cpp v0.0.0-20230515123100-6fdd0c338e56 h1:s8/MZdicstK
 github.com/donomii/go-rwkv.cpp v0.0.0-20230515123100-6fdd0c338e56/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
 github.com/donomii/go-rwkv.cpp v0.0.0-20230529074347-ccb05c3e1c6e h1:YbcLoxAwS0r7otEqU/d8bArubmfEJaG7dZPp0Aa52Io=
 github.com/donomii/go-rwkv.cpp v0.0.0-20230529074347-ccb05c3e1c6e/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
+github.com/donomii/go-rwkv.cpp v0.0.0-20230531084548-c43cdf5fc5bf h1:upCz8WYdzMeJg0qywUaVaGndY+niuicj5j6V4pvhNS4=
+github.com/donomii/go-rwkv.cpp v0.0.0-20230531084548-c43cdf5fc5bf/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520182345-041be06d5881 h1:dafqVivljYk51VLFnnpTXJnfWDe637EobWZ1l8PyEf8=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520182345-041be06d5881/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230523110439-77eab3fbfe5e h1:4PMorQuoUGAXmIzCtnNOHaasyLokXdgd8jUWwsraFTo=

From 28ee18028314e5f52b379abd5d1975d589ba7b51 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Wed, 31 May 2023 14:59:47 +0200
Subject: [PATCH 089/137] fix(deps): update
 github.com/go-skynet/go-ggml-transformers.cpp digest to 17b0655 (#454)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 192a5b27..2a3e943c 100644
--- a/go.mod
+++ b/go.mod
@@ -8,7 +8,7 @@ require (
 	github.com/go-audio/wav v1.1.0
 	github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf
 	github.com/go-skynet/go-bert.cpp v0.0.0-20230529074307-771b4a085972
-	github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230529215936-13ccc22621bb
+	github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230531065233-17b065584ef8
 	github.com/go-skynet/go-llama.cpp v0.0.0-20230530191504-62b6c079a47d
 	github.com/gofiber/fiber/v2 v2.46.0
 	github.com/google/uuid v1.3.0
diff --git a/go.sum b/go.sum
index d1fb0be7..60a37977 100644
--- a/go.sum
+++ b/go.sum
@@ -66,6 +66,8 @@ github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230529072326-695f97befe14
 github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230529072326-695f97befe14/go.mod h1:Rz967+t+aY6S+TBiW/WI8FM/C1WEMM+DamSMtKRxVAM=
 github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230529215936-13ccc22621bb h1:slNlMT8xB6w0QaMroTsqkNzNovUOEkpNpCawB7IjBFY=
 github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230529215936-13ccc22621bb/go.mod h1:SI+oF2+THMydq8Vo4+EzKJaQwtfWOy+lr7yWPP6FR2U=
+github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230531065233-17b065584ef8 h1:LK1DAgJsNMRUWaPpFOnE8XSF70UBybr3zGOvzP8Pdok=
+github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230531065233-17b065584ef8/go.mod h1:/JbU8HZU+tUOp+1bQAeXf3AyRXm+p3UwhccoJwCTI9A=
 github.com/go-skynet/go-gpt2.cpp v0.0.0-20230523153133-3eb3a32c0874 h1:/6QWh2oarU7iPSpXj/3bLlkKptyxjKTRrNtGUrh8vhI=
 github.com/go-skynet/go-gpt2.cpp v0.0.0-20230523153133-3eb3a32c0874/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230520155239-ccf23adfb278 h1:st4ow9JKy3UuhkwutrbWof2vMFU/YxwBCLYZ1IxJ2Po=

From eee3f83d98f69772eaebc718ff72987c88c5dfeb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Prud=27homme?=
 <sebastien.prudhomme@gmail.com>
Date: Wed, 31 May 2023 19:51:02 +0200
Subject: [PATCH 090/137] ci: build Docker image variants (#456)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Sébastien Prud'homme <sebastien.prudhomme@gmail.com>
---
 .github/workflows/image.yml | 79 +++++++++++++++++++------------------
 1 file changed, 41 insertions(+), 38 deletions(-)

diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index eeada322..38eed85a 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -15,34 +15,42 @@ concurrency:
 
 jobs:
   docker:
+    strategy:
+      matrix:
+        include:
+          - build-type: ''
+            platforms: 'linux/amd64,linux/arm64'
+            tag-latest: 'auto'
+            tag-suffix: ''
+          - build-type: 'cublas'
+            cuda-major-version: 11
+            cuda-minor-version: 7
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            tag-suffix: '-cublas-cuda11'
+          - build-type: 'cublas'
+            cuda-major-version: 12
+            cuda-minor-version: 1
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            tag-suffix: '-cublas-cuda12'
     runs-on: ubuntu-latest
     steps:
       - name: Checkout
         uses: actions/checkout@v3
 
-      - name: Prepare
-        id: prep
-        run: |
-          DOCKER_IMAGE=quay.io/go-skynet/local-ai
-          VERSION=master
-          SHORTREF=${GITHUB_SHA::8}
-
-          # If this is git tag, use the tag name as a docker tag
-          if [[ $GITHUB_REF == refs/tags/* ]]; then
-            VERSION=${GITHUB_REF#refs/tags/}
-          fi
-          TAGS="${DOCKER_IMAGE}:${VERSION},${DOCKER_IMAGE}:${SHORTREF}"
-
-          # If the VERSION looks like a version number, assume that
-          # this is the most recent version of the image and also
-          # tag it 'latest'.
-          if [[ $VERSION =~ ^v[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
-            TAGS="$TAGS,${DOCKER_IMAGE}:latest"
-          fi
-
-          # Set output parameters.
-          echo ::set-output name=tags::${TAGS}
-          echo ::set-output name=docker_image::${DOCKER_IMAGE}
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@v4
+        with:
+          images: quay.io/go-skynet/local-ai
+          tags: |
+            type=ref,event=branch
+            type=semver,pattern={{raw}}
+            type=sha
+          flavor: |
+            latest=${{ matrix.tag-latest }}
+            suffix=${{ matrix.tag-suffix }}
 
       - name: Set up QEMU
         uses: docker/setup-qemu-action@master
@@ -60,23 +68,18 @@ jobs:
           registry: quay.io
           username: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
           password: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
-      - name: Build
-        if: github.event_name != 'pull_request'
+
+      - name: Build and push
         uses: docker/build-push-action@v4
         with:
           builder: ${{ steps.buildx.outputs.name }}
+          build-args: |
+            BUILD_TYPE=${{ matrix.build-type }}
+            CUDA_MAJOR_VERSION=${{ matrix.cuda-major-version }}
+            CUDA_MINOR_VERSION=${{ matrix.cuda-minor-version }}
           context: .
           file: ./Dockerfile
-          platforms: linux/amd64,linux/arm64
-          push: true
-          tags: ${{ steps.prep.outputs.tags }}
-      - name: Build PRs
-        if: github.event_name == 'pull_request'
-        uses: docker/build-push-action@v4
-        with:
-          builder: ${{ steps.buildx.outputs.name }}
-          context: .
-          file: ./Dockerfile
-          platforms: linux/amd64
-          push: false
-          tags: ${{ steps.prep.outputs.tags }}
\ No newline at end of file
+          platforms: ${{ matrix.platforms }}
+          push: ${{ github.event_name != 'pull_request' }}
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}

From 8fd4c7afcc5e43fb84f94379cf6e6b12806c15b6 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Wed, 31 May 2023 19:55:46 +0200
Subject: [PATCH 091/137] fix(deps): update
 github.com/ggerganov/whisper.cpp/bindings/go digest to ce6f747 (#450)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 2a3e943c..1f72a09e 100644
--- a/go.mod
+++ b/go.mod
@@ -4,7 +4,7 @@ go 1.19
 
 require (
 	github.com/donomii/go-rwkv.cpp v0.0.0-20230531084548-c43cdf5fc5bf
-	github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230528233858-d7c936b44a80
+	github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230531071314-ce6f7470649f
 	github.com/go-audio/wav v1.1.0
 	github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf
 	github.com/go-skynet/go-bert.cpp v0.0.0-20230529074307-771b4a085972
diff --git a/go.sum b/go.sum
index 60a37977..439e98bd 100644
--- a/go.sum
+++ b/go.sum
@@ -32,6 +32,8 @@ github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230527074028-9b926844e3ae
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230527074028-9b926844e3ae/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230528233858-d7c936b44a80 h1:IeeVcNaQHdcG+GPg+meOPFvtonvO8p/HBzTrZGjpWZk=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230528233858-d7c936b44a80/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
+github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230531071314-ce6f7470649f h1:oGTI2SlcA7oGPFsmkS1m8psq3uKNnhhJ/MZ2ZWVZDe0=
+github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230531071314-ce6f7470649f/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
 github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
 github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs=
 github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA=

From 87a6bbd25187d6af2c28ad212e1b096f30cfa70f Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Wed, 31 May 2023 22:58:44 +0200
Subject: [PATCH 092/137] :arrow_up: Update ggerganov/whisper.cpp (#462)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 035bd5a3..3abe99c3 100644
--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,7 @@ GPT4ALL_VERSION?=337c7fecacfa4ae6779046513ab090687a5b0ef6
 GOGGMLTRANSFORMERS_VERSION?=13ccc22621bb21afecd38675a2b043498e2e756c
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=ccb05c3e1c6efd098017d114dcb58ab3262b40b2
-WHISPER_CPP_VERSION?=d7c936b44a80b8070676093fc00622333ba09cd3
+WHISPER_CPP_VERSION?=ce6f7470649f169027626dc92b3a2e39b4eff463
 BERT_VERSION?=771b4a08597224b21cff070950ef4f68690e14ad
 BLOOMZ_VERSION?=1834e77b83faafe912ad4092ccf7f77937349e2f
 BUILD_TYPE?=

From 275c1247014a73c348078359655c58ac0c4eed0e Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Wed, 31 May 2023 22:59:02 +0200
Subject: [PATCH 093/137] :arrow_up: Update go-skynet/go-llama.cpp (#458)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 3abe99c3..6fa334ae 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@ GOTEST=$(GOCMD) test
 GOVET=$(GOCMD) vet
 BINARY_NAME=local-ai
 
-GOLLAMA_VERSION?=62b6c079a47d6949c982ed8e684b94bdbf48b41c
+GOLLAMA_VERSION?=10caf37d8b73386708b4373975b8917e6b212c0e
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
 GPT4ALL_VERSION?=337c7fecacfa4ae6779046513ab090687a5b0ef6
 GOGGMLTRANSFORMERS_VERSION?=13ccc22621bb21afecd38675a2b043498e2e756c

From d70c55231b2c0442d4e72c710a9cd1ce9e4ef593 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 1 Jun 2023 01:04:07 +0200
Subject: [PATCH 094/137] docs: Update README with model gallery url

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index ae312d4f..45b3d40b 100644
--- a/README.md
+++ b/README.md
@@ -28,7 +28,7 @@ LocalAI was created by [Ettore Di Giacinto](https://github.com/mudler/) and is a
 |  ![Screenshot from 2023-04-26 23-59-55](https://user-images.githubusercontent.com/2420543/234715439-98d12e03-d3ce-4f94-ab54-2b256808e05e.png)            | ![b6441997879](https://github.com/go-skynet/LocalAI/assets/2420543/d50af51c-51b7-4f39-b6c2-bf04c403894c)                  |
 
 
-See the [Getting started](https://localai.io/basics/getting_started/index.html) and [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/) sections to learn how to use LocalAI. For a list of curated models check out the [model gallery](https://github.com/go-skynet/model-gallery).
+See the [Getting started](https://localai.io/basics/getting_started/index.html) and [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/) sections to learn how to use LocalAI. For a list of curated models check out the [model gallery](https://localai.io/models/).
 
 ## News
 

From 451e8034448257390dfc2cde3bc6f9cc24c44db4 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Thu, 1 Jun 2023 01:05:24 +0200
Subject: [PATCH 095/137] fix(deps): update github.com/go-skynet/go-llama.cpp
 digest to 10caf37 (#455)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 1f72a09e..0d8cba0d 100644
--- a/go.mod
+++ b/go.mod
@@ -9,7 +9,7 @@ require (
 	github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf
 	github.com/go-skynet/go-bert.cpp v0.0.0-20230529074307-771b4a085972
 	github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230531065233-17b065584ef8
-	github.com/go-skynet/go-llama.cpp v0.0.0-20230530191504-62b6c079a47d
+	github.com/go-skynet/go-llama.cpp v0.0.0-20230531065249-10caf37d8b73
 	github.com/gofiber/fiber/v2 v2.46.0
 	github.com/google/uuid v1.3.0
 	github.com/hashicorp/go-multierror v1.1.1
diff --git a/go.sum b/go.sum
index 439e98bd..46500237 100644
--- a/go.sum
+++ b/go.sum
@@ -84,6 +84,8 @@ github.com/go-skynet/go-llama.cpp v0.0.0-20230529221033-4afcaf28f36f h1:HmXiNF9S
 github.com/go-skynet/go-llama.cpp v0.0.0-20230529221033-4afcaf28f36f/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230530191504-62b6c079a47d h1:daPcVEptc/6arcS/QV4QDCdYiwMGCiiR5rnzUs63WK0=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230530191504-62b6c079a47d/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230531065249-10caf37d8b73 h1:swwsrYpPYOsyGFrX/0nhaYa93aHH6I61HpSJpQkN1tY=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230531065249-10caf37d8b73/go.mod h1:ddYIvPZyj3Vf4XkfZimVRRehZu2isd0JXfK3EemVQPk=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
 github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=

From 7282668da12e35971e446eacab404ae7a1c34fdf Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Thu, 1 Jun 2023 09:59:11 +0200
Subject: [PATCH 096/137] fix(deps): update
 github.com/ggerganov/whisper.cpp/bindings/go digest to 3f7436e (#466)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 0d8cba0d..621dd104 100644
--- a/go.mod
+++ b/go.mod
@@ -4,7 +4,7 @@ go 1.19
 
 require (
 	github.com/donomii/go-rwkv.cpp v0.0.0-20230531084548-c43cdf5fc5bf
-	github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230531071314-ce6f7470649f
+	github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230601065548-3f7436e8a096
 	github.com/go-audio/wav v1.1.0
 	github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf
 	github.com/go-skynet/go-bert.cpp v0.0.0-20230529074307-771b4a085972
diff --git a/go.sum b/go.sum
index 46500237..f7d05935 100644
--- a/go.sum
+++ b/go.sum
@@ -34,6 +34,8 @@ github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230528233858-d7c936b44a80
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230528233858-d7c936b44a80/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230531071314-ce6f7470649f h1:oGTI2SlcA7oGPFsmkS1m8psq3uKNnhhJ/MZ2ZWVZDe0=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230531071314-ce6f7470649f/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
+github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230601065548-3f7436e8a096 h1:TD7v8FnwWCWlOsrkpnumsbxsflyhTI3rSm2HInqqSAI=
+github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230601065548-3f7436e8a096/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
 github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
 github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs=
 github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA=

From 3ba07a5928b2246fd6e1d6854a2fe9953739ae80 Mon Sep 17 00:00:00 2001
From: Pavel Zloi <github.com@drteam.rocks>
Date: Thu, 1 Jun 2023 13:00:06 +0300
Subject: [PATCH 097/137] feat: add LangChainGo Huggingface backend (#446)

Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 .dockerignore                                 |  2 +
 .gitignore                                    |  1 +
 api/prediction.go                             | 18 +++++
 examples/langchain-huggingface/README.md      | 68 +++++++++++++++++++
 .../langchain-huggingface/docker-compose.yml  | 15 ++++
 .../models/completion.tmpl                    |  1 +
 .../models/gpt-3.5-turbo.yaml                 | 17 +++++
 .../langchain-huggingface/models/gpt4all.tmpl |  4 ++
 go.mod                                        |  1 +
 go.sum                                        |  2 +
 pkg/langchain/huggingface.go                  | 47 +++++++++++++
 pkg/langchain/langchain.go                    | 57 ++++++++++++++++
 pkg/model/initializers.go                     |  8 +++
 13 files changed, 241 insertions(+)
 create mode 100644 examples/langchain-huggingface/README.md
 create mode 100644 examples/langchain-huggingface/docker-compose.yml
 create mode 100644 examples/langchain-huggingface/models/completion.tmpl
 create mode 100644 examples/langchain-huggingface/models/gpt-3.5-turbo.yaml
 create mode 100644 examples/langchain-huggingface/models/gpt4all.tmpl
 create mode 100644 pkg/langchain/huggingface.go
 create mode 100644 pkg/langchain/langchain.go

diff --git a/.dockerignore b/.dockerignore
index 41478502..e73b1f9d 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,3 +1,5 @@
+.git
+.idea
 models
 examples/chatbot-ui/models
 examples/rwkv/models
diff --git a/.gitignore b/.gitignore
index 878047ee..20215af2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,3 +24,4 @@ release/
 
 # just in case
 .DS_Store
+.idea
diff --git a/api/prediction.go b/api/prediction.go
index 4ae1b69a..8aad4228 100644
--- a/api/prediction.go
+++ b/api/prediction.go
@@ -9,6 +9,7 @@ import (
 	"sync"
 
 	"github.com/donomii/go-rwkv.cpp"
+	"github.com/go-skynet/LocalAI/pkg/langchain"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/go-skynet/LocalAI/pkg/stablediffusion"
 	"github.com/go-skynet/bloomz.cpp"
@@ -494,6 +495,23 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
 			model.SetTokenCallback(nil)
 			return str, er
 		}
+	case *langchain.HuggingFace:
+		fn = func() (string, error) {
+
+			// Generate the prediction using the language model
+			predictOptions := []langchain.PredictOption{
+				langchain.SetModel(c.Model),
+				langchain.SetMaxTokens(c.Maxtokens),
+				langchain.SetTemperature(c.Temperature),
+				langchain.SetStopWords(c.StopWords),
+			}
+
+			pred, er := model.PredictHuggingFace(s, predictOptions...)
+			if er != nil {
+				return "", er
+			}
+			return pred.Completion, nil
+		}
 	}
 
 	return func() (string, error) {
diff --git a/examples/langchain-huggingface/README.md b/examples/langchain-huggingface/README.md
new file mode 100644
index 00000000..23fdcd32
--- /dev/null
+++ b/examples/langchain-huggingface/README.md
@@ -0,0 +1,68 @@
+# Data query example
+
+Example of integration with HuggingFace Inference API with help of [langchaingo](https://github.com/tmc/langchaingo).
+
+## Setup
+
+Download the LocalAI and start the API:
+
+```bash
+# Clone LocalAI
+git clone https://github.com/go-skynet/LocalAI
+
+cd LocalAI/examples/langchain-huggingface
+
+docker-compose up -d
+```
+
+Node: Ensure you've set `HUGGINGFACEHUB_API_TOKEN` environment variable, you can generate it
+on [Settings / Access Tokens](https://huggingface.co/settings/tokens) page of HuggingFace site.
+
+This is an example `.env` file for LocalAI:
+
+```ini
+MODELS_PATH=/models
+CONTEXT_SIZE=512
+HUGGINGFACEHUB_API_TOKEN=hg_123456
+```
+
+## Using remote models
+
+Now you can use any remote models available via HuggingFace API, for example let's enable using of
+[gpt2](https://huggingface.co/gpt2) model in `gpt-3.5-turbo.yaml` config:
+
+```yml
+name: gpt-3.5-turbo
+parameters:
+  model: gpt2
+  top_k: 80
+  temperature: 0.2
+  top_p: 0.7
+context_size: 1024
+backend: "langchain-huggingface"
+stopwords:
+- "HUMAN:"
+- "GPT:"
+roles:
+  user: " "
+  system: " "
+template:
+  completion: completion
+  chat: gpt4all
+```
+
+Here is you can see in field `parameters.model` equal `gpt2` and `backend` equal `langchain-huggingface`.
+
+## How to use
+
+```shell
+# Now API is accessible at localhost:8080
+curl http://localhost:8080/v1/models
+# {"object":"list","data":[{"id":"gpt-3.5-turbo","object":"model"}]}
+
+curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
+  "model": "gpt-3.5-turbo",
+  "prompt": "A long time ago in a galaxy far, far away",
+  "temperature": 0.7
+}'
+```
\ No newline at end of file
diff --git a/examples/langchain-huggingface/docker-compose.yml b/examples/langchain-huggingface/docker-compose.yml
new file mode 100644
index 00000000..96ef540e
--- /dev/null
+++ b/examples/langchain-huggingface/docker-compose.yml
@@ -0,0 +1,15 @@
+version: '3.6'
+
+services:
+  api:
+    image: quay.io/go-skynet/local-ai:latest
+    build:
+      context: ../../
+      dockerfile: Dockerfile
+    ports:
+      - 8080:8080
+    env_file:
+      - ../../.env
+    volumes:
+      - ./models:/models:cached
+    command: ["/usr/bin/local-ai"]
diff --git a/examples/langchain-huggingface/models/completion.tmpl b/examples/langchain-huggingface/models/completion.tmpl
new file mode 100644
index 00000000..1e04a465
--- /dev/null
+++ b/examples/langchain-huggingface/models/completion.tmpl
@@ -0,0 +1 @@
+{{.Input}}
diff --git a/examples/langchain-huggingface/models/gpt-3.5-turbo.yaml b/examples/langchain-huggingface/models/gpt-3.5-turbo.yaml
new file mode 100644
index 00000000..76e9ab18
--- /dev/null
+++ b/examples/langchain-huggingface/models/gpt-3.5-turbo.yaml
@@ -0,0 +1,17 @@
+name: gpt-3.5-turbo
+parameters:
+  model: gpt2
+  top_k: 80
+  temperature: 0.2
+  top_p: 0.7
+context_size: 1024
+backend: "langchain-huggingface"
+stopwords:
+- "HUMAN:"
+- "GPT:"
+roles:
+  user: " "
+  system: " "
+template:
+  completion: completion
+  chat: gpt4all
diff --git a/examples/langchain-huggingface/models/gpt4all.tmpl b/examples/langchain-huggingface/models/gpt4all.tmpl
new file mode 100644
index 00000000..f76b080a
--- /dev/null
+++ b/examples/langchain-huggingface/models/gpt4all.tmpl
@@ -0,0 +1,4 @@
+The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
+### Prompt:
+{{.Input}}
+### Response:
diff --git a/go.mod b/go.mod
index 621dd104..7ab82410 100644
--- a/go.mod
+++ b/go.mod
@@ -59,6 +59,7 @@ require (
 	github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 // indirect
 	github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee // indirect
 	github.com/tinylib/msgp v1.1.8 // indirect
+	github.com/tmc/langchaingo v0.0.0-20230530193922-fb062652f841 // indirect
 	github.com/valyala/bytebufferpool v1.0.0 // indirect
 	github.com/valyala/tcplisten v1.0.0 // indirect
 	github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
diff --git a/go.sum b/go.sum
index f7d05935..872940b2 100644
--- a/go.sum
+++ b/go.sum
@@ -192,6 +192,8 @@ github.com/swaggo/swag v1.16.1/go.mod h1:9/LMvHycG3NFHfR6LwvikHv5iFvmPADQ359cKik
 github.com/tinylib/msgp v1.1.6/go.mod h1:75BAfg2hauQhs3qedfdDZmWAPcFMAvJE5b9rGOMufyw=
 github.com/tinylib/msgp v1.1.8 h1:FCXC1xanKO4I8plpHGH2P7koL/RzZs12l/+r7vakfm0=
 github.com/tinylib/msgp v1.1.8/go.mod h1:qkpG+2ldGg4xRFmx+jfTvZPxfGFhi64BcnL9vkCm/Tw=
+github.com/tmc/langchaingo v0.0.0-20230530193922-fb062652f841 h1:IVlfKPZzq3W1G+CkhZgN5VjmHnAeB3YqEvxyNPPCZXY=
+github.com/tmc/langchaingo v0.0.0-20230530193922-fb062652f841/go.mod h1:6l1WoyqVDwkv7cFlY3gfcTv8yVowVyuutKv8PGlQCWI=
 github.com/urfave/cli/v2 v2.25.3 h1:VJkt6wvEBOoSjPFQvOkv6iWIrsJyCrKGtCtxXWwmGeY=
 github.com/urfave/cli/v2 v2.25.3/go.mod h1:GHupkWPMM0M/sj1a2b4wUrWBPzazNrIjouW6fmdJLxc=
 github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
diff --git a/pkg/langchain/huggingface.go b/pkg/langchain/huggingface.go
new file mode 100644
index 00000000..38c55cd5
--- /dev/null
+++ b/pkg/langchain/huggingface.go
@@ -0,0 +1,47 @@
+package langchain
+
+import (
+	"context"
+
+	"github.com/tmc/langchaingo/llms"
+	"github.com/tmc/langchaingo/llms/huggingface"
+)
+
+type HuggingFace struct {
+	modelPath string
+}
+
+func NewHuggingFace(repoId string) (*HuggingFace, error) {
+	return &HuggingFace{
+		modelPath: repoId,
+	}, nil
+}
+
+func (s *HuggingFace) PredictHuggingFace(text string, opts ...PredictOption) (*Predict, error) {
+	po := NewPredictOptions(opts...)
+
+	// Init client
+	llm, err := huggingface.New()
+	if err != nil {
+		return nil, err
+	}
+
+	// Convert from LocalAI to LangChainGo format of options
+	co := []llms.CallOption{
+		llms.WithModel(po.Model),
+		llms.WithMaxTokens(po.MaxTokens),
+		llms.WithTemperature(po.Temperature),
+		llms.WithStopWords(po.StopWords),
+	}
+
+	// Call Inference API
+	ctx := context.Background()
+	completion, err := llm.Call(ctx, text, co...)
+	if err != nil {
+		return nil, err
+	}
+
+	return &Predict{
+		Completion: completion,
+	}, nil
+}
diff --git a/pkg/langchain/langchain.go b/pkg/langchain/langchain.go
new file mode 100644
index 00000000..737bc4b5
--- /dev/null
+++ b/pkg/langchain/langchain.go
@@ -0,0 +1,57 @@
+package langchain
+
+type PredictOptions struct {
+	Model string `json:"model"`
+	// MaxTokens is the maximum number of tokens to generate.
+	MaxTokens int `json:"max_tokens"`
+	// Temperature is the temperature for sampling, between 0 and 1.
+	Temperature float64 `json:"temperature"`
+	// StopWords is a list of words to stop on.
+	StopWords []string `json:"stop_words"`
+}
+
+type PredictOption func(p *PredictOptions)
+
+var DefaultOptions = PredictOptions{
+	Model:       "gpt2",
+	MaxTokens:   200,
+	Temperature: 0.96,
+	StopWords:   nil,
+}
+
+type Predict struct {
+	Completion string
+}
+
+func SetModel(model string) PredictOption {
+	return func(o *PredictOptions) {
+		o.Model = model
+	}
+}
+
+func SetTemperature(temperature float64) PredictOption {
+	return func(o *PredictOptions) {
+		o.Temperature = temperature
+	}
+}
+
+func SetMaxTokens(maxTokens int) PredictOption {
+	return func(o *PredictOptions) {
+		o.MaxTokens = maxTokens
+	}
+}
+
+func SetStopWords(stopWords []string) PredictOption {
+	return func(o *PredictOptions) {
+		o.StopWords = stopWords
+	}
+}
+
+// NewPredictOptions Create a new PredictOptions object with the given options.
+func NewPredictOptions(opts ...PredictOption) PredictOptions {
+	p := DefaultOptions
+	for _, opt := range opts {
+		opt(&p)
+	}
+	return p
+}
diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go
index dc593a7c..518e59f1 100644
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -7,6 +7,7 @@ import (
 
 	rwkv "github.com/donomii/go-rwkv.cpp"
 	whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
+	"github.com/go-skynet/LocalAI/pkg/langchain"
 	"github.com/go-skynet/LocalAI/pkg/stablediffusion"
 	bloomz "github.com/go-skynet/bloomz.cpp"
 	bert "github.com/go-skynet/go-bert.cpp"
@@ -36,6 +37,7 @@ const (
 	RwkvBackend            = "rwkv"
 	WhisperBackend         = "whisper"
 	StableDiffusionBackend = "stablediffusion"
+	LCHuggingFaceBackend   = "langchain-huggingface"
 )
 
 var backends []string = []string{
@@ -100,6 +102,10 @@ var whisperModel = func(modelFile string) (interface{}, error) {
 	return whisper.New(modelFile)
 }
 
+var lcHuggingFace = func(repoId string) (interface{}, error) {
+	return langchain.NewHuggingFace(repoId)
+}
+
 func llamaLM(opts ...llama.ModelOption) func(string) (interface{}, error) {
 	return func(s string) (interface{}, error) {
 		return llama.New(s, opts...)
@@ -159,6 +165,8 @@ func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, lla
 		return ml.LoadModel(modelFile, rwkvLM(filepath.Join(ml.ModelPath, modelFile+tokenizerSuffix), threads))
 	case WhisperBackend:
 		return ml.LoadModel(modelFile, whisperModel)
+	case LCHuggingFaceBackend:
+		return ml.LoadModel(modelFile, lcHuggingFace)
 	default:
 		return nil, fmt.Errorf("backend unsupported: %s", backendString)
 	}

From c8a4a4f4e9b515e839f36d3c872eba3f5d3c1c45 Mon Sep 17 00:00:00 2001
From: Aisuko <urakiny@gmail.com>
Date: Fri, 2 Jun 2023 00:20:45 +1000
Subject: [PATCH 098/137] feat: Add new test cases for LoadConfigs (#447)

Signed-off-by: Aisuko <urakiny@gmail.com>
---
 Makefile           |  2 +-
 api/config.go      | 13 ++++++++++---
 api/config_test.go | 29 ++++++++++++++++++++++++++++-
 3 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/Makefile b/Makefile
index 6fa334ae..8c79251d 100644
--- a/Makefile
+++ b/Makefile
@@ -235,7 +235,7 @@ test-models/testmodel:
 
 test: prepare test-models/testmodel
 	cp tests/models_fixtures/* test-models
-	C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flakeAttempts 5 -v -r ./api ./pkg
+	C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./api ./pkg
 
 ## Help:
 help: ## Show this help.
diff --git a/api/config.go b/api/config.go
index 42aecbe8..5204cea7 100644
--- a/api/config.go
+++ b/api/config.go
@@ -3,7 +3,7 @@ package api
 import (
 	"encoding/json"
 	"fmt"
-	"io/ioutil"
+	"io/fs"
 	"os"
 	"path/filepath"
 	"strings"
@@ -130,11 +130,18 @@ func (cm ConfigMerger) ListConfigs() []string {
 func (cm ConfigMerger) LoadConfigs(path string) error {
 	cm.Lock()
 	defer cm.Unlock()
-	files, err := ioutil.ReadDir(path)
+	entries, err := os.ReadDir(path)
 	if err != nil {
 		return err
 	}
-
+	files := make([]fs.FileInfo, 0, len(entries))
+	for _, entry := range entries {
+		info, err := entry.Info()
+		if err != nil {
+			return err
+		}
+		files = append(files, info)
+	}
 	for _, file := range files {
 		// Skip templates, YAML and .keep files
 		if !strings.Contains(file.Name(), ".yaml") {
diff --git a/api/config_test.go b/api/config_test.go
index 9950f803..626b90be 100644
--- a/api/config_test.go
+++ b/api/config_test.go
@@ -3,6 +3,7 @@ package api
 import (
 	"os"
 
+	"github.com/go-skynet/LocalAI/pkg/model"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 )
@@ -20,8 +21,34 @@ var _ = Describe("Test cases for config related functions", func() {
 			Expect(err).To(BeNil())
 			Expect(config).ToNot(BeNil())
 			// two configs in config.yaml
-			Expect(len(config)).To(Equal(2))
+			Expect(config[0].Name).To(Equal("list1"))
+			Expect(config[1].Name).To(Equal("list2"))
 		})
 
+		It("Test LoadConfigs", func() {
+			cm := NewConfigMerger()
+			options := newOptions()
+			modelLoader := model.NewModelLoader(os.Getenv("MODELS_PATH"))
+			WithModelLoader(modelLoader)(options)
+
+			err := cm.LoadConfigs(options.loader.ModelPath)
+			Expect(err).To(BeNil())
+			Expect(cm.configs).ToNot(BeNil())
+
+			// config should includes gpt4all models's api.config
+			Expect(cm.configs).To(HaveKey("gpt4all"))
+
+			// config should includes gpt2 models's api.config
+			Expect(cm.configs).To(HaveKey("gpt4all-2"))
+
+			// config should includes text-embedding-ada-002 models's api.config
+			Expect(cm.configs).To(HaveKey("text-embedding-ada-002"))
+
+			// config should includes rwkv_test models's api.config
+			Expect(cm.configs).To(HaveKey("rwkv_test"))
+
+			// config should includes whisper-1 models's api.config
+			Expect(cm.configs).To(HaveKey("whisper-1"))
+		})
 	})
 })

From c5cb2ff2687f7539696b710b5d97ea07e5899d77 Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Thu, 1 Jun 2023 16:21:13 +0200
Subject: [PATCH 099/137] :arrow_up: Update go-skynet/go-bert.cpp (#463)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 8c79251d..836ef569 100644
--- a/Makefile
+++ b/Makefile
@@ -10,7 +10,7 @@ GOGGMLTRANSFORMERS_VERSION?=13ccc22621bb21afecd38675a2b043498e2e756c
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=ccb05c3e1c6efd098017d114dcb58ab3262b40b2
 WHISPER_CPP_VERSION?=ce6f7470649f169027626dc92b3a2e39b4eff463
-BERT_VERSION?=771b4a08597224b21cff070950ef4f68690e14ad
+BERT_VERSION?=0548994371f7081e45fcf8d472f3941a12f179aa
 BLOOMZ_VERSION?=1834e77b83faafe912ad4092ccf7f77937349e2f
 BUILD_TYPE?=
 CGO_LDFLAGS?=

From 07cee3f6efe75ebfb820d12f8a4018e0fe8019c1 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Thu, 1 Jun 2023 16:21:32 +0200
Subject: [PATCH 100/137] fix(deps): update github.com/donomii/go-rwkv.cpp
 digest to 3b28b09 (#467)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 4 ++--
 go.sum | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/go.mod b/go.mod
index 7ab82410..eaacf3e2 100644
--- a/go.mod
+++ b/go.mod
@@ -3,7 +3,7 @@ module github.com/go-skynet/LocalAI
 go 1.19
 
 require (
-	github.com/donomii/go-rwkv.cpp v0.0.0-20230531084548-c43cdf5fc5bf
+	github.com/donomii/go-rwkv.cpp v0.0.0-20230601111443-3b28b09469fc
 	github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230601065548-3f7436e8a096
 	github.com/go-audio/wav v1.1.0
 	github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf
@@ -22,6 +22,7 @@ require (
 	github.com/rs/zerolog v1.29.1
 	github.com/sashabaranov/go-openai v1.9.5
 	github.com/swaggo/swag v1.16.1
+	github.com/tmc/langchaingo v0.0.0-20230530193922-fb062652f841
 	github.com/urfave/cli/v2 v2.25.3
 	github.com/valyala/fasthttp v1.47.0
 	gopkg.in/yaml.v2 v2.4.0
@@ -59,7 +60,6 @@ require (
 	github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 // indirect
 	github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee // indirect
 	github.com/tinylib/msgp v1.1.8 // indirect
-	github.com/tmc/langchaingo v0.0.0-20230530193922-fb062652f841 // indirect
 	github.com/valyala/bytebufferpool v1.0.0 // indirect
 	github.com/valyala/tcplisten v1.0.0 // indirect
 	github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
diff --git a/go.sum b/go.sum
index 872940b2..f1cd37f0 100644
--- a/go.sum
+++ b/go.sum
@@ -22,6 +22,8 @@ github.com/donomii/go-rwkv.cpp v0.0.0-20230529074347-ccb05c3e1c6e h1:YbcLoxAwS0r
 github.com/donomii/go-rwkv.cpp v0.0.0-20230529074347-ccb05c3e1c6e/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
 github.com/donomii/go-rwkv.cpp v0.0.0-20230531084548-c43cdf5fc5bf h1:upCz8WYdzMeJg0qywUaVaGndY+niuicj5j6V4pvhNS4=
 github.com/donomii/go-rwkv.cpp v0.0.0-20230531084548-c43cdf5fc5bf/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
+github.com/donomii/go-rwkv.cpp v0.0.0-20230601111443-3b28b09469fc h1:RCGGh/zw+K09sjCIYHUV7lFenxONml+LS02RdN+AkwI=
+github.com/donomii/go-rwkv.cpp v0.0.0-20230601111443-3b28b09469fc/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520182345-041be06d5881 h1:dafqVivljYk51VLFnnpTXJnfWDe637EobWZ1l8PyEf8=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520182345-041be06d5881/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230523110439-77eab3fbfe5e h1:4PMorQuoUGAXmIzCtnNOHaasyLokXdgd8jUWwsraFTo=

From 5c018c04370b6c67de0c3bad8c55418d547a9b88 Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Thu, 1 Jun 2023 16:23:16 +0200
Subject: [PATCH 101/137] :arrow_up: Update ggerganov/whisper.cpp (#468)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 836ef569..2d703c8e 100644
--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,7 @@ GPT4ALL_VERSION?=337c7fecacfa4ae6779046513ab090687a5b0ef6
 GOGGMLTRANSFORMERS_VERSION?=13ccc22621bb21afecd38675a2b043498e2e756c
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=ccb05c3e1c6efd098017d114dcb58ab3262b40b2
-WHISPER_CPP_VERSION?=ce6f7470649f169027626dc92b3a2e39b4eff463
+WHISPER_CPP_VERSION?=3f7436e8a09611931709b29f5c507245c8c1d7a4
 BERT_VERSION?=0548994371f7081e45fcf8d472f3941a12f179aa
 BLOOMZ_VERSION?=1834e77b83faafe912ad4092ccf7f77937349e2f
 BUILD_TYPE?=

From 42d753846eacd253a01187f4a950f26d96d5de01 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Thu, 1 Jun 2023 16:23:34 +0200
Subject: [PATCH 102/137] fix(deps): update
 github.com/ggerganov/whisper.cpp/bindings/go digest to 5b9e59b (#469)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index eaacf3e2..44a8d340 100644
--- a/go.mod
+++ b/go.mod
@@ -4,7 +4,7 @@ go 1.19
 
 require (
 	github.com/donomii/go-rwkv.cpp v0.0.0-20230601111443-3b28b09469fc
-	github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230601065548-3f7436e8a096
+	github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230601124500-5b9e59bc07dd
 	github.com/go-audio/wav v1.1.0
 	github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf
 	github.com/go-skynet/go-bert.cpp v0.0.0-20230529074307-771b4a085972
diff --git a/go.sum b/go.sum
index f1cd37f0..fef1ca4a 100644
--- a/go.sum
+++ b/go.sum
@@ -38,6 +38,8 @@ github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230531071314-ce6f7470649f
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230531071314-ce6f7470649f/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230601065548-3f7436e8a096 h1:TD7v8FnwWCWlOsrkpnumsbxsflyhTI3rSm2HInqqSAI=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230601065548-3f7436e8a096/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
+github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230601124500-5b9e59bc07dd h1:os3FeYEIB4j5m5QlbFC3HkVcaAmLxNXz48uIfQAexm0=
+github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230601124500-5b9e59bc07dd/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
 github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
 github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs=
 github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA=

From 78ad4813dfe056e8ee3ff4c537ac87c901ccd9a0 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 1 Jun 2023 23:38:52 +0200
Subject: [PATCH 103/137] feat: Update gpt4all, support multiple
 implementations in runtime (#472)

Signed-off-by: mudler <mudler@mocaccino.org>
---
 .gitignore                |  3 +++
 Makefile                  | 34 ++++++++++++--------------
 api/api.go                |  7 ++++++
 api/api_test.go           |  2 +-
 api/backend_assets.go     | 27 +++++++++++++++++++++
 api/options.go            | 16 ++++++++++++
 assets.go                 |  6 +++++
 go.mod                    |  2 +-
 go.sum                    |  2 ++
 main.go                   |  8 ++++++
 pkg/assets/extract.go     | 51 +++++++++++++++++++++++++++++++++++++++
 pkg/model/initializers.go | 13 +++-------
 12 files changed, 142 insertions(+), 29 deletions(-)
 create mode 100644 api/backend_assets.go
 create mode 100644 assets.go
 create mode 100644 pkg/assets/extract.go

diff --git a/.gitignore b/.gitignore
index 20215af2..d1a7f148 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,3 +25,6 @@ release/
 # just in case
 .DS_Store
 .idea
+
+# Generated during build
+backend-assets/
\ No newline at end of file
diff --git a/Makefile b/Makefile
index 2d703c8e..0f18db07 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
 
 GOLLAMA_VERSION?=10caf37d8b73386708b4373975b8917e6b212c0e
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
-GPT4ALL_VERSION?=337c7fecacfa4ae6779046513ab090687a5b0ef6
+GPT4ALL_VERSION?=022f1cabe7dd2c911936b37510582f279069ba1e
 GOGGMLTRANSFORMERS_VERSION?=13ccc22621bb21afecd38675a2b043498e2e756c
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=ccb05c3e1c6efd098017d114dcb58ab3262b40b2
@@ -63,22 +63,13 @@ gpt4all:
 	git clone --recurse-submodules $(GPT4ALL_REPO) gpt4all
 	cd gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1
 	# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
-	@find ./gpt4all -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
-	@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
-	@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
-	@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
-	@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
-	@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/set_console_color/set_gptj_console_color/g' {} +
-	@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/set_console_color/set_gptj_console_color/g' {} +
-	@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
-	@find ./gpt4all -type f -name "*.go" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
-	@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
-	@find ./gpt4all -type f -name "*.txt" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
-	@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gptj_/g' {} +
-	@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_gptj_replace/g' {} +
-	@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} +
-	@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/regex_escape/gpt4allregex_escape/g' {} +
-	mv ./gpt4all/gpt4all-backend/llama.cpp/llama_util.h ./gpt4all/gpt4all-backend/llama.cpp/gptjllama_util.h
+	@find ./gpt4all -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gpt4all_/g' {} +
+	@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gpt4all_/g' {} +
+	@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gpt4all_/g' {} +
+	@find ./gpt4all/gpt4all-bindings/golang -type f -name "*.cpp" -exec sed -i'' -e 's/load_model/load_gpt4all_model/g' {} +
+	@find ./gpt4all/gpt4all-bindings/golang -type f -name "*.go" -exec sed -i'' -e 's/load_model/load_gpt4all_model/g' {} +
+	@find ./gpt4all/gpt4all-bindings/golang -type f -name "*.h" -exec sed -i'' -e 's/load_model/load_gpt4all_model/g' {} +
+
 
 ## BERT embeddings
 go-bert:
@@ -124,6 +115,12 @@ bloomz/libbloomz.a: bloomz
 go-bert/libgobert.a: go-bert
 	$(MAKE) -C go-bert libgobert.a
 
+backend-assets/gpt4all: gpt4all/gpt4all-bindings/golang/libgpt4all.a
+	mkdir -p backend-assets/gpt4all
+	@cp gpt4all/gpt4all-bindings/golang/buildllm/*.so backend-assets/gpt4all/ || true
+	@cp gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
+	@cp gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true
+
 gpt4all/gpt4all-bindings/golang/libgpt4all.a: gpt4all
 	$(MAKE) -C gpt4all/gpt4all-bindings/golang/ libgpt4all.a
 
@@ -188,7 +185,7 @@ rebuild: ## Rebuilds the project
 	$(MAKE) -C bloomz clean
 	$(MAKE) build
 
-prepare: prepare-sources gpt4all/gpt4all-bindings/golang/libgpt4all.a $(OPTIONAL_TARGETS) go-llama/libbinding.a go-bert/libgobert.a go-ggml-transformers/libtransformers.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a bloomz/libbloomz.a  ## Prepares for building
+prepare: prepare-sources backend-assets/gpt4all $(OPTIONAL_TARGETS) go-llama/libbinding.a go-bert/libgobert.a go-ggml-transformers/libtransformers.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a bloomz/libbloomz.a  ## Prepares for building
 
 clean: ## Remove build related file
 	rm -fr ./go-llama
@@ -196,6 +193,7 @@ clean: ## Remove build related file
 	rm -rf ./go-gpt2
 	rm -rf ./go-stable-diffusion
 	rm -rf ./go-ggml-transformers
+	rm -rf ./backend-assets
 	rm -rf ./go-rwkv
 	rm -rf ./go-bert
 	rm -rf ./bloomz
diff --git a/api/api.go b/api/api.go
index fb3addca..701ef401 100644
--- a/api/api.go
+++ b/api/api.go
@@ -66,6 +66,13 @@ func App(opts ...AppOption) (*fiber.App, error) {
 			log.Debug().Msgf("Model: %s (config: %+v)", v, cfg)
 		}
 	}
+
+	if options.assetsDestination != "" {
+		if err := PrepareBackendAssets(options.backendAssets, options.assetsDestination); err != nil {
+			log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly, like gpt4all)", err)
+		}
+	}
+
 	// Default middleware config
 	app.Use(recover.New())
 
diff --git a/api/api_test.go b/api/api_test.go
index 2fa7e43b..e1d3f712 100644
--- a/api/api_test.go
+++ b/api/api_test.go
@@ -257,7 +257,7 @@ var _ = Describe("API test", func() {
 		It("returns errors", func() {
 			_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"})
 			Expect(err).To(HaveOccurred())
-			Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 12 errors occurred:"))
+			Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 10 errors occurred:"))
 		})
 		It("transcribes audio", func() {
 			if runtime.GOOS != "linux" {
diff --git a/api/backend_assets.go b/api/backend_assets.go
new file mode 100644
index 00000000..5a3ed76e
--- /dev/null
+++ b/api/backend_assets.go
@@ -0,0 +1,27 @@
+package api
+
+import (
+	"embed"
+	"os"
+	"path/filepath"
+
+	"github.com/go-skynet/LocalAI/pkg/assets"
+	"github.com/rs/zerolog/log"
+)
+
+func PrepareBackendAssets(backendAssets embed.FS, dst string) error {
+
+	// Extract files from the embedded FS
+	err := assets.ExtractFiles(backendAssets, dst)
+	if err != nil {
+		return err
+	}
+
+	// Set GPT4ALL libs where we extracted the files
+	// https://github.com/nomic-ai/gpt4all/commit/27e80e1d10985490c9fd4214e4bf458cfcf70896
+	gpt4alldir := filepath.Join(dst, "backend-assets", "gpt4all")
+	os.Setenv("GPT4ALL_IMPLEMENTATIONS_PATH", gpt4alldir)
+	log.Debug().Msgf("GPT4ALL_IMPLEMENTATIONS_PATH: %s", gpt4alldir)
+
+	return nil
+}
diff --git a/api/options.go b/api/options.go
index ea7497c7..2049f42c 100644
--- a/api/options.go
+++ b/api/options.go
@@ -2,6 +2,7 @@ package api
 
 import (
 	"context"
+	"embed"
 
 	model "github.com/go-skynet/LocalAI/pkg/model"
 )
@@ -18,6 +19,9 @@ type Option struct {
 	preloadJSONModels               string
 	preloadModelsFromPath           string
 	corsAllowOrigins                string
+
+	backendAssets     embed.FS
+	assetsDestination string
 }
 
 type AppOption func(*Option)
@@ -49,6 +53,18 @@ func WithCorsAllowOrigins(b string) AppOption {
 	}
 }
 
+func WithBackendAssetsOutput(out string) AppOption {
+	return func(o *Option) {
+		o.assetsDestination = out
+	}
+}
+
+func WithBackendAssets(f embed.FS) AppOption {
+	return func(o *Option) {
+		o.backendAssets = f
+	}
+}
+
 func WithContext(ctx context.Context) AppOption {
 	return func(o *Option) {
 		o.context = ctx
diff --git a/assets.go b/assets.go
new file mode 100644
index 00000000..1acff154
--- /dev/null
+++ b/assets.go
@@ -0,0 +1,6 @@
+package main
+
+import "embed"
+
+//go:embed backend-assets/*
+var backendAssets embed.FS
diff --git a/go.mod b/go.mod
index 44a8d340..de431395 100644
--- a/go.mod
+++ b/go.mod
@@ -15,7 +15,7 @@ require (
 	github.com/hashicorp/go-multierror v1.1.1
 	github.com/imdario/mergo v0.3.16
 	github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642
-	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230531011104-5f940208e4f5
+	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230601151908-5175db27813c
 	github.com/onsi/ginkgo/v2 v2.9.7
 	github.com/onsi/gomega v1.27.7
 	github.com/otiai10/openaigo v1.1.0
diff --git a/go.sum b/go.sum
index fef1ca4a..fc1d84c3 100644
--- a/go.sum
+++ b/go.sum
@@ -155,6 +155,8 @@ github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230528235700-9eb81c
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230528235700-9eb81cb54922/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230531011104-5f940208e4f5 h1:99cF+V5wk7IInDAEM9HAlSHdLf/xoJR529Wr8lAG5KQ=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230531011104-5f940208e4f5/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230601151908-5175db27813c h1:KXYqUH6bdYbxnF67l8wayctaCZ4BQJQOsUyNke7HC0A=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230601151908-5175db27813c/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/onsi/ginkgo/v2 v2.9.5 h1:+6Hr4uxzP4XIUyAkg61dWBw8lb/gc4/X5luuxN/EC+Q=
 github.com/onsi/ginkgo/v2 v2.9.5/go.mod h1:tvAoo1QUJwNEU2ITftXTpR7R1RbCzoZUOs3RonqW57k=
 github.com/onsi/ginkgo/v2 v2.9.7 h1:06xGQy5www2oN160RtEZoTvnP2sPhEfePYmCDc2szss=
diff --git a/main.go b/main.go
index bdf95db7..6c0f6af7 100644
--- a/main.go
+++ b/main.go
@@ -80,6 +80,12 @@ func main() {
 				EnvVars:     []string{"IMAGE_PATH"},
 				Value:       "",
 			},
+			&cli.StringFlag{
+				Name:        "backend-assets-path",
+				DefaultText: "Path used to extract libraries that are required by some of the backends in runtime.",
+				EnvVars:     []string{"BACKEND_ASSETS_PATH"},
+				Value:       "/tmp/localai/backend_data",
+			},
 			&cli.IntFlag{
 				Name:        "context-size",
 				DefaultText: "Default context size of the model",
@@ -124,6 +130,8 @@ It uses llama.cpp, ggml and gpt4all as backend with golang c bindings.
 				api.WithCors(ctx.Bool("cors")),
 				api.WithCorsAllowOrigins(ctx.String("cors-allow-origins")),
 				api.WithThreads(ctx.Int("threads")),
+				api.WithBackendAssets(backendAssets),
+				api.WithBackendAssetsOutput(ctx.String("backend-assets-path")),
 				api.WithUploadLimitMB(ctx.Int("upload-limit")))
 			if err != nil {
 				return err
diff --git a/pkg/assets/extract.go b/pkg/assets/extract.go
new file mode 100644
index 00000000..fc208377
--- /dev/null
+++ b/pkg/assets/extract.go
@@ -0,0 +1,51 @@
+package assets
+
+import (
+	"embed"
+	"fmt"
+	"io/fs"
+	"os"
+	"path/filepath"
+)
+
+func ExtractFiles(content embed.FS, extractDir string) error {
+	// Create the target directory if it doesn't exist
+	err := os.MkdirAll(extractDir, 0755)
+	if err != nil {
+		return fmt.Errorf("failed to create directory: %v", err)
+	}
+
+	// Walk through the embedded FS and extract files
+	err = fs.WalkDir(content, ".", func(path string, d fs.DirEntry, err error) error {
+		if err != nil {
+			return err
+		}
+
+		// Reconstruct the directory structure in the target directory
+		targetFile := filepath.Join(extractDir, path)
+		if d.IsDir() {
+			// Create the directory in the target directory
+			err := os.MkdirAll(targetFile, 0755)
+			if err != nil {
+				return fmt.Errorf("failed to create directory: %v", err)
+			}
+			return nil
+		}
+
+		// Read the file from the embedded FS
+		fileData, err := content.ReadFile(path)
+		if err != nil {
+			return fmt.Errorf("failed to read file: %v", err)
+		}
+
+		// Create the file in the target directory
+		err = os.WriteFile(targetFile, fileData, 0644)
+		if err != nil {
+			return fmt.Errorf("failed to write file: %v", err)
+		}
+
+		return nil
+	})
+
+	return err
+}
diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go
index 518e59f1..b2c23b77 100644
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -33,6 +33,7 @@ const (
 	Gpt4AllLlamaBackend    = "gpt4all-llama"
 	Gpt4AllMptBackend      = "gpt4all-mpt"
 	Gpt4AllJBackend        = "gpt4all-j"
+	Gpt4All                = "gpt4all"
 	BertEmbeddingsBackend  = "bert-embeddings"
 	RwkvBackend            = "rwkv"
 	WhisperBackend         = "whisper"
@@ -42,9 +43,7 @@ const (
 
 var backends []string = []string{
 	LlamaBackend,
-	Gpt4AllLlamaBackend,
-	Gpt4AllMptBackend,
-	Gpt4AllJBackend,
+	Gpt4All,
 	RwkvBackend,
 	GPTNeoXBackend,
 	WhisperBackend,
@@ -153,12 +152,8 @@ func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, lla
 		return ml.LoadModel(modelFile, stableDiffusion)
 	case StarcoderBackend:
 		return ml.LoadModel(modelFile, starCoder)
-	case Gpt4AllLlamaBackend:
-		return ml.LoadModel(modelFile, gpt4allLM(gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.LLaMAType)))
-	case Gpt4AllMptBackend:
-		return ml.LoadModel(modelFile, gpt4allLM(gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.MPTType)))
-	case Gpt4AllJBackend:
-		return ml.LoadModel(modelFile, gpt4allLM(gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.GPTJType)))
+	case Gpt4AllLlamaBackend, Gpt4AllMptBackend, Gpt4AllJBackend, Gpt4All:
+		return ml.LoadModel(modelFile, gpt4allLM(gpt4all.SetThreads(int(threads))))
 	case BertEmbeddingsBackend:
 		return ml.LoadModel(modelFile, bertEmbeddings)
 	case RwkvBackend:

From 4422ca2235f48188f80725f678e8bb3d2d1dcf56 Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Thu, 1 Jun 2023 23:43:15 +0200
Subject: [PATCH 104/137] :arrow_up: Update go-skynet/go-ggml-transformers.cpp
 (#459)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 0f18db07..76742b4d 100644
--- a/Makefile
+++ b/Makefile
@@ -6,7 +6,7 @@ BINARY_NAME=local-ai
 GOLLAMA_VERSION?=10caf37d8b73386708b4373975b8917e6b212c0e
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
 GPT4ALL_VERSION?=022f1cabe7dd2c911936b37510582f279069ba1e
-GOGGMLTRANSFORMERS_VERSION?=13ccc22621bb21afecd38675a2b043498e2e756c
+GOGGMLTRANSFORMERS_VERSION?=17b065584ef8f3280b6286235f0db95eec8a4b1c
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=ccb05c3e1c6efd098017d114dcb58ab3262b40b2
 WHISPER_CPP_VERSION?=3f7436e8a09611931709b29f5c507245c8c1d7a4

From a63d6f6364517e6076464de2c915c8469f3ebb8a Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Thu, 1 Jun 2023 23:44:05 +0200
Subject: [PATCH 105/137] :arrow_up: Update ggerganov/whisper.cpp (#473)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 76742b4d..a39281a7 100644
--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,7 @@ GPT4ALL_VERSION?=022f1cabe7dd2c911936b37510582f279069ba1e
 GOGGMLTRANSFORMERS_VERSION?=17b065584ef8f3280b6286235f0db95eec8a4b1c
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=ccb05c3e1c6efd098017d114dcb58ab3262b40b2
-WHISPER_CPP_VERSION?=3f7436e8a09611931709b29f5c507245c8c1d7a4
+WHISPER_CPP_VERSION?=5b9e59bc07dd76320354f2af6be29f16dbcb21e7
 BERT_VERSION?=0548994371f7081e45fcf8d472f3941a12f179aa
 BLOOMZ_VERSION?=1834e77b83faafe912ad4092ccf7f77937349e2f
 BUILD_TYPE?=

From 1b766ab89ce1b2dccd75f3eb9706b21781c2cc36 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Thu, 1 Jun 2023 23:44:17 +0200
Subject: [PATCH 106/137] fix(deps): update module github.com/urfave/cli/v2 to
 v2.25.5 (#399)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index de431395..93712490 100644
--- a/go.mod
+++ b/go.mod
@@ -23,7 +23,7 @@ require (
 	github.com/sashabaranov/go-openai v1.9.5
 	github.com/swaggo/swag v1.16.1
 	github.com/tmc/langchaingo v0.0.0-20230530193922-fb062652f841
-	github.com/urfave/cli/v2 v2.25.3
+	github.com/urfave/cli/v2 v2.25.5
 	github.com/valyala/fasthttp v1.47.0
 	gopkg.in/yaml.v2 v2.4.0
 	gopkg.in/yaml.v3 v3.0.1
diff --git a/go.sum b/go.sum
index fc1d84c3..77e2acf3 100644
--- a/go.sum
+++ b/go.sum
@@ -202,6 +202,8 @@ github.com/tmc/langchaingo v0.0.0-20230530193922-fb062652f841 h1:IVlfKPZzq3W1G+C
 github.com/tmc/langchaingo v0.0.0-20230530193922-fb062652f841/go.mod h1:6l1WoyqVDwkv7cFlY3gfcTv8yVowVyuutKv8PGlQCWI=
 github.com/urfave/cli/v2 v2.25.3 h1:VJkt6wvEBOoSjPFQvOkv6iWIrsJyCrKGtCtxXWwmGeY=
 github.com/urfave/cli/v2 v2.25.3/go.mod h1:GHupkWPMM0M/sj1a2b4wUrWBPzazNrIjouW6fmdJLxc=
+github.com/urfave/cli/v2 v2.25.5 h1:d0NIAyhh5shGscroL7ek/Ya9QYQE0KNabJgiUinIQkc=
+github.com/urfave/cli/v2 v2.25.5/go.mod h1:GHupkWPMM0M/sj1a2b4wUrWBPzazNrIjouW6fmdJLxc=
 github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
 github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
 github.com/valyala/fasthttp v1.47.0 h1:y7moDoxYzMooFpT5aHgNgVOQDrS3qlkfiP9mDtGGK9c=

From 5a8dd409183ab905fd053a4012a495e8110148b0 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 2 Jun 2023 08:53:45 +0200
Subject: [PATCH 107/137] feat: Enable stablediffusion by default in container
 images (#474)

Signed-off-by: mudler <mudler@mocaccino.org>
---
 Dockerfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index f7fdbb8e..beee4ea1 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -3,7 +3,7 @@ ARG GO_VERSION=1.20
 FROM golang:$GO_VERSION as builder
 
 ARG BUILD_TYPE=
-ARG GO_TAGS=
+ARG GO_TAGS=stablediffusion
 ARG CUDA_MAJOR_VERSION=11
 ARG CUDA_MINOR_VERSION=7
 
@@ -45,7 +45,7 @@ RUN make build
 FROM golang:$GO_VERSION
 
 ARG BUILD_TYPE=
-ARG GO_TAGS=
+ARG GO_TAGS=stablediffusion
 ARG CUDA_MAJOR_VERSION=11
 ARG CUDA_MINOR_VERSION=7
 

From 19f92d7d55950ccb9c0f34976f7ba9d0a15c24bc Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 2 Jun 2023 08:53:57 +0200
Subject: [PATCH 108/137] fix: Bump and fix rwkv build (#475)

---
 Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile
index a39281a7..b4b53bfc 100644
--- a/Makefile
+++ b/Makefile
@@ -7,8 +7,8 @@ GOLLAMA_VERSION?=10caf37d8b73386708b4373975b8917e6b212c0e
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
 GPT4ALL_VERSION?=022f1cabe7dd2c911936b37510582f279069ba1e
 GOGGMLTRANSFORMERS_VERSION?=17b065584ef8f3280b6286235f0db95eec8a4b1c
-RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
-RWKV_VERSION?=ccb05c3e1c6efd098017d114dcb58ab3262b40b2
+RWKV_REPO?=https://github.com/mudler/go-rwkv.cpp
+RWKV_VERSION?=f25c89f8e55a67d57c01661a16abeed1b1c25016
 WHISPER_CPP_VERSION?=5b9e59bc07dd76320354f2af6be29f16dbcb21e7
 BERT_VERSION?=0548994371f7081e45fcf8d472f3941a12f179aa
 BLOOMZ_VERSION?=1834e77b83faafe912ad4092ccf7f77937349e2f
@@ -96,7 +96,7 @@ go-rwkv:
 	@find ./go-rwkv -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_rwkv_/g' {} +
 
 go-rwkv/librwkv.a: go-rwkv
-	cd go-rwkv && cd rwkv.cpp &&	cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF &&	cmake --build . && 	cp librwkv.a .. && cp ggml/src/libggml.a ..
+	cd go-rwkv && cd rwkv.cpp &&	cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF &&	cmake --build . && 	cp librwkv.a ..
 
 ## bloomz
 bloomz:

From e875c1f64a7320417c12a70767366c5920b5c217 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 2 Jun 2023 09:57:01 +0200
Subject: [PATCH 109/137] fix: fix the make run target (#476)

Signed-off-by: mudler <mudler@mocaccino.org>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index b4b53bfc..4dd3aed0 100644
--- a/Makefile
+++ b/Makefile
@@ -218,7 +218,7 @@ generic-build: ## Build the project using generic
 
 ## Run
 run: prepare ## run local-ai
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) run ./main.go
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) run ./
 
 test-models/testmodel:
 	mkdir test-models

From 695f3e57588f8f7d5e25b18b800a6b815f1d2759 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Fri, 2 Jun 2023 13:32:27 +0200
Subject: [PATCH 110/137] fix(deps): update
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 031d714 (#464)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 93712490..df867694 100644
--- a/go.mod
+++ b/go.mod
@@ -15,7 +15,7 @@ require (
 	github.com/hashicorp/go-multierror v1.1.1
 	github.com/imdario/mergo v0.3.16
 	github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642
-	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230601151908-5175db27813c
+	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230601213413-031d7149a7fd
 	github.com/onsi/ginkgo/v2 v2.9.7
 	github.com/onsi/gomega v1.27.7
 	github.com/otiai10/openaigo v1.1.0
diff --git a/go.sum b/go.sum
index 77e2acf3..5d64fd06 100644
--- a/go.sum
+++ b/go.sum
@@ -157,6 +157,8 @@ github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230531011104-5f9402
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230531011104-5f940208e4f5/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230601151908-5175db27813c h1:KXYqUH6bdYbxnF67l8wayctaCZ4BQJQOsUyNke7HC0A=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230601151908-5175db27813c/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230601213413-031d7149a7fd h1:VTPLKWrmiwYnSHfZh2KHqwSbMeM3D50J6VmDznyY3Ak=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230601213413-031d7149a7fd/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/onsi/ginkgo/v2 v2.9.5 h1:+6Hr4uxzP4XIUyAkg61dWBw8lb/gc4/X5luuxN/EC+Q=
 github.com/onsi/ginkgo/v2 v2.9.5/go.mod h1:tvAoo1QUJwNEU2ITftXTpR7R1RbCzoZUOs3RonqW57k=
 github.com/onsi/ginkgo/v2 v2.9.7 h1:06xGQy5www2oN160RtEZoTvnP2sPhEfePYmCDc2szss=

From 51a1a721b38b07936adb5cb121ec50d29fd97239 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Fri, 2 Jun 2023 19:31:21 +0200
Subject: [PATCH 111/137] fix(deps): update
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to be9f6ad (#477)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index df867694..7d820031 100644
--- a/go.mod
+++ b/go.mod
@@ -15,7 +15,7 @@ require (
 	github.com/hashicorp/go-multierror v1.1.1
 	github.com/imdario/mergo v0.3.16
 	github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642
-	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230601213413-031d7149a7fd
+	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230602151000-be9f6ad54342
 	github.com/onsi/ginkgo/v2 v2.9.7
 	github.com/onsi/gomega v1.27.7
 	github.com/otiai10/openaigo v1.1.0
diff --git a/go.sum b/go.sum
index 5d64fd06..6dacc0c4 100644
--- a/go.sum
+++ b/go.sum
@@ -159,6 +159,8 @@ github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230601151908-5175db
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230601151908-5175db27813c/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230601213413-031d7149a7fd h1:VTPLKWrmiwYnSHfZh2KHqwSbMeM3D50J6VmDznyY3Ak=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230601213413-031d7149a7fd/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230602151000-be9f6ad54342 h1:Nca3BDITw9yrhMksPL5VKpj+nOUmDXTy7qB7tHJy0R8=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230602151000-be9f6ad54342/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/onsi/ginkgo/v2 v2.9.5 h1:+6Hr4uxzP4XIUyAkg61dWBw8lb/gc4/X5luuxN/EC+Q=
 github.com/onsi/ginkgo/v2 v2.9.5/go.mod h1:tvAoo1QUJwNEU2ITftXTpR7R1RbCzoZUOs3RonqW57k=
 github.com/onsi/ginkgo/v2 v2.9.7 h1:06xGQy5www2oN160RtEZoTvnP2sPhEfePYmCDc2szss=

From 96794851b3a638b37ca9041efe03df1cae67bd5b Mon Sep 17 00:00:00 2001
From: Samuel Maynard <samwmaynard@gmail.com>
Date: Fri, 2 Jun 2023 17:27:03 -0500
Subject: [PATCH 112/137] feat: add support for `Stream: true` to
 completionEndpoint (#465)

---
 api/openai.go | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 73 insertions(+), 1 deletion(-)

diff --git a/api/openai.go b/api/openai.go
index b97b4e56..cb935101 100644
--- a/api/openai.go
+++ b/api/openai.go
@@ -4,6 +4,7 @@ import (
 	"bufio"
 	"bytes"
 	"encoding/base64"
+	"errors"
 	"encoding/json"
 	"fmt"
 	"io"
@@ -143,13 +144,29 @@ func defaultRequest(modelFile string) OpenAIRequest {
 
 // https://platform.openai.com/docs/api-reference/completions
 func completionEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
-	return func(c *fiber.Ctx) error {
+	process := func(s string, req *OpenAIRequest, config *Config, loader *model.ModelLoader, responses chan OpenAIResponse) {
+		ComputeChoices(s, req, config, loader, func(s string, c *[]Choice) {}, func(s string) bool {
+			resp := OpenAIResponse{
+				Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
+				Choices: []Choice{{Text: s}},
+				Object:  "text_completion",
+			}
+			log.Debug().Msgf("Sending goroutine: %s", s)
 
+			responses <- resp
+			return true
+		})
+		close(responses)
+	}
+
+	return func(c *fiber.Ctx) error {
 		model, input, err := readInput(c, o.loader, true)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 
+		log.Debug().Msgf("`input`: %+v", input)
+
 		config, input, err := readConfig(model, input, cm, o.loader, o.debug, o.threads, o.ctxSize, o.f16)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
@@ -157,12 +174,67 @@ func completionEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
 
 		log.Debug().Msgf("Parameter Config: %+v", config)
 
+		if input.Stream {
+			log.Debug().Msgf("Stream request received")
+			c.Context().SetContentType("text/event-stream")
+			//c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8)
+			//c.Set("Content-Type", "text/event-stream")
+			c.Set("Cache-Control", "no-cache")
+			c.Set("Connection", "keep-alive")
+			c.Set("Transfer-Encoding", "chunked")
+		}
+
 		templateFile := config.Model
 
 		if config.TemplateConfig.Completion != "" {
 			templateFile = config.TemplateConfig.Completion
 		}
 
+		if input.Stream {
+			if (len(config.PromptStrings) > 1) {
+				return errors.New("cannot handle more than 1 `PromptStrings` when `Stream`ing")
+			}
+
+			predInput := config.PromptStrings[0]
+
+			// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
+			templatedInput, err := o.loader.TemplatePrefix(templateFile, struct {
+				Input string
+			}{Input: predInput})
+			if err == nil {
+				predInput = templatedInput
+				log.Debug().Msgf("Template found, input modified to: %s", predInput)
+			}
+
+			responses := make(chan OpenAIResponse)
+
+			go process(predInput, input, config, o.loader, responses)
+
+			c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
+
+				for ev := range responses {
+					var buf bytes.Buffer
+					enc := json.NewEncoder(&buf)
+					enc.Encode(ev)
+
+					log.Debug().Msgf("Sending chunk: %s", buf.String())
+					fmt.Fprintf(w, "data: %v\n", buf.String())
+					w.Flush()
+				}
+
+				resp := &OpenAIResponse{
+					Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
+					Choices: []Choice{{FinishReason: "stop"}},
+				}
+				respData, _ := json.Marshal(resp)
+
+				w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
+				w.WriteString("data: [DONE]\n\n")
+				w.Flush()
+			}))
+			return nil
+		}
+
 		var result []Choice
 		for _, i := range config.PromptStrings {
 			// A model can have a "file.bin.tmpl" file associated with a prompt template prefix

From aa6cdf16c81749e7ec12ad05b5e7207502ab2f98 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Prud=27homme?=
 <sebastien.prudhomme@gmail.com>
Date: Sat, 3 Jun 2023 14:25:30 +0200
Subject: [PATCH 113/137] fix: display help with correct default values (#481)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Sébastien Prud'homme <sebastien.prudhomme@gmail.com>
---
 main.go | 74 ++++++++++++++++++++++++++++-----------------------------
 1 file changed, 37 insertions(+), 37 deletions(-)

diff --git a/main.go b/main.go
index 6c0f6af7..16d5e314 100644
--- a/main.go
+++ b/main.go
@@ -42,61 +42,61 @@ func main() {
 				EnvVars: []string{"CORS_ALLOW_ORIGINS"},
 			},
 			&cli.IntFlag{
-				Name:        "threads",
-				DefaultText: "Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested.",
-				EnvVars:     []string{"THREADS"},
-				Value:       4,
+				Name:    "threads",
+				Usage:   "Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested.",
+				EnvVars: []string{"THREADS"},
+				Value:   4,
 			},
 			&cli.StringFlag{
-				Name:        "models-path",
-				DefaultText: "Path containing models used for inferencing",
-				EnvVars:     []string{"MODELS_PATH"},
-				Value:       filepath.Join(path, "models"),
+				Name:    "models-path",
+				Usage:   "Path containing models used for inferencing",
+				EnvVars: []string{"MODELS_PATH"},
+				Value:   filepath.Join(path, "models"),
 			},
 			&cli.StringFlag{
-				Name:        "preload-models",
-				DefaultText: "A List of models to apply in JSON at start",
-				EnvVars:     []string{"PRELOAD_MODELS"},
+				Name:    "preload-models",
+				Usage:   "A List of models to apply in JSON at start",
+				EnvVars: []string{"PRELOAD_MODELS"},
 			},
 			&cli.StringFlag{
-				Name:        "preload-models-config",
-				DefaultText: "A List of models to apply at startup. Path to a YAML config file",
-				EnvVars:     []string{"PRELOAD_MODELS_CONFIG"},
+				Name:    "preload-models-config",
+				Usage:   "A List of models to apply at startup. Path to a YAML config file",
+				EnvVars: []string{"PRELOAD_MODELS_CONFIG"},
 			},
 			&cli.StringFlag{
-				Name:        "config-file",
-				DefaultText: "Config file",
-				EnvVars:     []string{"CONFIG_FILE"},
+				Name:    "config-file",
+				Usage:   "Config file",
+				EnvVars: []string{"CONFIG_FILE"},
 			},
 			&cli.StringFlag{
-				Name:        "address",
-				DefaultText: "Bind address for the API server.",
-				EnvVars:     []string{"ADDRESS"},
-				Value:       ":8080",
+				Name:    "address",
+				Usage:   "Bind address for the API server.",
+				EnvVars: []string{"ADDRESS"},
+				Value:   ":8080",
 			},
 			&cli.StringFlag{
-				Name:        "image-path",
-				DefaultText: "Image directory",
-				EnvVars:     []string{"IMAGE_PATH"},
-				Value:       "",
+				Name:    "image-path",
+				Usage:   "Image directory",
+				EnvVars: []string{"IMAGE_PATH"},
+				Value:   "",
 			},
 			&cli.StringFlag{
-				Name:        "backend-assets-path",
-				DefaultText: "Path used to extract libraries that are required by some of the backends in runtime.",
-				EnvVars:     []string{"BACKEND_ASSETS_PATH"},
-				Value:       "/tmp/localai/backend_data",
+				Name:    "backend-assets-path",
+				Usage:   "Path used to extract libraries that are required by some of the backends in runtime.",
+				EnvVars: []string{"BACKEND_ASSETS_PATH"},
+				Value:   "/tmp/localai/backend_data",
 			},
 			&cli.IntFlag{
-				Name:        "context-size",
-				DefaultText: "Default context size of the model",
-				EnvVars:     []string{"CONTEXT_SIZE"},
-				Value:       512,
+				Name:    "context-size",
+				Usage:   "Default context size of the model",
+				EnvVars: []string{"CONTEXT_SIZE"},
+				Value:   512,
 			},
 			&cli.IntFlag{
-				Name:        "upload-limit",
-				DefaultText: "Default upload-limit. MB",
-				EnvVars:     []string{"UPLOAD_LIMIT"},
-				Value:       15,
+				Name:    "upload-limit",
+				Usage:   "Default upload-limit. MB",
+				EnvVars: []string{"UPLOAD_LIMIT"},
+				Value:   15,
 			},
 		},
 		Description: `

From 29856f75271f6c0fe9b0dd24f8b6cdc5f2842513 Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Sat, 3 Jun 2023 14:25:42 +0200
Subject: [PATCH 114/137] :arrow_up: Update nomic-ai/gpt4all (#479)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 4dd3aed0..456a995d 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
 
 GOLLAMA_VERSION?=10caf37d8b73386708b4373975b8917e6b212c0e
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
-GPT4ALL_VERSION?=022f1cabe7dd2c911936b37510582f279069ba1e
+GPT4ALL_VERSION?=f3564ac6b9b1d4bed1d5338e3689268419c773bc
 GOGGMLTRANSFORMERS_VERSION?=17b065584ef8f3280b6286235f0db95eec8a4b1c
 RWKV_REPO?=https://github.com/mudler/go-rwkv.cpp
 RWKV_VERSION?=f25c89f8e55a67d57c01661a16abeed1b1c25016

From f8e2e76698131bc73cf61651667ab9f544ee1cce Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Sat, 3 Jun 2023 14:25:55 +0200
Subject: [PATCH 115/137] fix(deps): update
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 25ee51e (#478)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 7d820031..70aef969 100644
--- a/go.mod
+++ b/go.mod
@@ -15,7 +15,7 @@ require (
 	github.com/hashicorp/go-multierror v1.1.1
 	github.com/imdario/mergo v0.3.16
 	github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642
-	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230602151000-be9f6ad54342
+	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230603001950-25ee51e2ca3a
 	github.com/onsi/ginkgo/v2 v2.9.7
 	github.com/onsi/gomega v1.27.7
 	github.com/otiai10/openaigo v1.1.0
diff --git a/go.sum b/go.sum
index 6dacc0c4..1f40c8c6 100644
--- a/go.sum
+++ b/go.sum
@@ -161,6 +161,8 @@ github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230601213413-031d71
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230601213413-031d7149a7fd/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230602151000-be9f6ad54342 h1:Nca3BDITw9yrhMksPL5VKpj+nOUmDXTy7qB7tHJy0R8=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230602151000-be9f6ad54342/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230603001950-25ee51e2ca3a h1:ALsGoIFe2IZLMD+y0/ds7Spn8e9qiucQ9hod0zTRmfk=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230603001950-25ee51e2ca3a/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/onsi/ginkgo/v2 v2.9.5 h1:+6Hr4uxzP4XIUyAkg61dWBw8lb/gc4/X5luuxN/EC+Q=
 github.com/onsi/ginkgo/v2 v2.9.5/go.mod h1:tvAoo1QUJwNEU2ITftXTpR7R1RbCzoZUOs3RonqW57k=
 github.com/onsi/ginkgo/v2 v2.9.7 h1:06xGQy5www2oN160RtEZoTvnP2sPhEfePYmCDc2szss=

From b9f1f85433fe13b1dfcddd69dfb6f3c019635efd Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Sat, 3 Jun 2023 18:30:18 +0200
Subject: [PATCH 116/137] :arrow_up: Update go-skynet/go-llama.cpp (#482)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 456a995d..449eeed2 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@ GOTEST=$(GOCMD) test
 GOVET=$(GOCMD) vet
 BINARY_NAME=local-ai
 
-GOLLAMA_VERSION?=10caf37d8b73386708b4373975b8917e6b212c0e
+GOLLAMA_VERSION?=3f10005b70c657c317d2cae4c22a9bd295f54a3c
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
 GPT4ALL_VERSION?=f3564ac6b9b1d4bed1d5338e3689268419c773bc
 GOGGMLTRANSFORMERS_VERSION?=17b065584ef8f3280b6286235f0db95eec8a4b1c

From 05edf59c919d0ba0fc154fc03024c26e2aa2ab3e Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Sat, 3 Jun 2023 18:30:30 +0200
Subject: [PATCH 117/137] :arrow_up: Update nomic-ai/gpt4all (#483)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 449eeed2..10d9603d 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
 
 GOLLAMA_VERSION?=3f10005b70c657c317d2cae4c22a9bd295f54a3c
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
-GPT4ALL_VERSION?=f3564ac6b9b1d4bed1d5338e3689268419c773bc
+GPT4ALL_VERSION?=23391d44e0b99406906eabc4865ec12dd3c69bdc
 GOGGMLTRANSFORMERS_VERSION?=17b065584ef8f3280b6286235f0db95eec8a4b1c
 RWKV_REPO?=https://github.com/mudler/go-rwkv.cpp
 RWKV_VERSION?=f25c89f8e55a67d57c01661a16abeed1b1c25016

From 8fb86c13bcaa922ad97e62e0babaabbdd5e5f31e Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 3 Jun 2023 23:46:07 +0200
Subject: [PATCH 118/137] feat: Enable static builds for Linux binaries (#487)

Signed-off-by: mudler <mudler@mocaccino.org>
---
 .github/workflows/release.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index 6d564657..47057557 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -32,7 +32,7 @@ jobs:
           CMAKE_ARGS: "${{ matrix.defines }}"
           BUILD_ID: "${{ matrix.build }}"
         run: |
-          make dist
+          STATIC=true make dist
       - uses: actions/upload-artifact@v3
         with:
           name: ${{ matrix.build }}

From b64c1d8ac1ee2f2fe62d4282152230663abc357d Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Sun, 4 Jun 2023 01:56:59 +0200
Subject: [PATCH 119/137] :arrow_up: Update nomic-ai/gpt4all (#488)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 10d9603d..040fb2ef 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
 
 GOLLAMA_VERSION?=3f10005b70c657c317d2cae4c22a9bd295f54a3c
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
-GPT4ALL_VERSION?=23391d44e0b99406906eabc4865ec12dd3c69bdc
+GPT4ALL_VERSION?=bc624f5389d656b1995b6db592f76f5853712cf6
 GOGGMLTRANSFORMERS_VERSION?=17b065584ef8f3280b6286235f0db95eec8a4b1c
 RWKV_REPO?=https://github.com/mudler/go-rwkv.cpp
 RWKV_VERSION?=f25c89f8e55a67d57c01661a16abeed1b1c25016

From 8a34679a13fbec941863763f8a353df3b9710cf1 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Sun, 4 Jun 2023 01:57:10 +0200
Subject: [PATCH 120/137] fix(deps): update github.com/go-skynet/go-llama.cpp
 digest to 3f10005 (#485)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 70aef969..48675b3b 100644
--- a/go.mod
+++ b/go.mod
@@ -9,7 +9,7 @@ require (
 	github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf
 	github.com/go-skynet/go-bert.cpp v0.0.0-20230529074307-771b4a085972
 	github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230531065233-17b065584ef8
-	github.com/go-skynet/go-llama.cpp v0.0.0-20230531065249-10caf37d8b73
+	github.com/go-skynet/go-llama.cpp v0.0.0-20230603122627-3f10005b70c6
 	github.com/gofiber/fiber/v2 v2.46.0
 	github.com/google/uuid v1.3.0
 	github.com/hashicorp/go-multierror v1.1.1
diff --git a/go.sum b/go.sum
index 1f40c8c6..c2f672fc 100644
--- a/go.sum
+++ b/go.sum
@@ -92,6 +92,8 @@ github.com/go-skynet/go-llama.cpp v0.0.0-20230530191504-62b6c079a47d h1:daPcVEpt
 github.com/go-skynet/go-llama.cpp v0.0.0-20230530191504-62b6c079a47d/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230531065249-10caf37d8b73 h1:swwsrYpPYOsyGFrX/0nhaYa93aHH6I61HpSJpQkN1tY=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230531065249-10caf37d8b73/go.mod h1:ddYIvPZyj3Vf4XkfZimVRRehZu2isd0JXfK3EemVQPk=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230603122627-3f10005b70c6 h1:w+S5j+znKE8ZKogSp0tcdmYO/v94Wym0g9Os+iWEu2w=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230603122627-3f10005b70c6/go.mod h1:ddYIvPZyj3Vf4XkfZimVRRehZu2isd0JXfK3EemVQPk=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
 github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=

From 3892fafc2d5f6fb88dadcba8377c34d50eda77b0 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Sun, 4 Jun 2023 01:57:20 +0200
Subject: [PATCH 121/137] fix(deps): update
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to bc624f5 (#486)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 48675b3b..32f63aad 100644
--- a/go.mod
+++ b/go.mod
@@ -15,7 +15,7 @@ require (
 	github.com/hashicorp/go-multierror v1.1.1
 	github.com/imdario/mergo v0.3.16
 	github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642
-	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230603001950-25ee51e2ca3a
+	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230603140917-bc624f5389d6
 	github.com/onsi/ginkgo/v2 v2.9.7
 	github.com/onsi/gomega v1.27.7
 	github.com/otiai10/openaigo v1.1.0
diff --git a/go.sum b/go.sum
index c2f672fc..3b2550bb 100644
--- a/go.sum
+++ b/go.sum
@@ -165,6 +165,8 @@ github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230602151000-be9f6a
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230602151000-be9f6ad54342/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230603001950-25ee51e2ca3a h1:ALsGoIFe2IZLMD+y0/ds7Spn8e9qiucQ9hod0zTRmfk=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230603001950-25ee51e2ca3a/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230603140917-bc624f5389d6 h1:GcwtLT80QuxAC7Dg+EpCQv1k/2Abhw8kvxQn3vuit5Q=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230603140917-bc624f5389d6/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/onsi/ginkgo/v2 v2.9.5 h1:+6Hr4uxzP4XIUyAkg61dWBw8lb/gc4/X5luuxN/EC+Q=
 github.com/onsi/ginkgo/v2 v2.9.5/go.mod h1:tvAoo1QUJwNEU2ITftXTpR7R1RbCzoZUOs3RonqW57k=
 github.com/onsi/ginkgo/v2 v2.9.7 h1:06xGQy5www2oN160RtEZoTvnP2sPhEfePYmCDc2szss=

From 1bb85377e4662b0cc67f83252d57abd8a4cb6802 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 4 Jun 2023 14:00:21 +0200
Subject: [PATCH 122/137] feat: add ffmpeg images (#492)

Signed-off-by: mudler <mudler@mocaccino.org>
---
 .github/workflows/image.yml | 24 ++++++++++++++++++++++++
 Dockerfile                  |  7 +++++++
 2 files changed, 31 insertions(+)

diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index 38eed85a..baf18b42 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -22,18 +22,41 @@ jobs:
             platforms: 'linux/amd64,linux/arm64'
             tag-latest: 'auto'
             tag-suffix: ''
+            ffmpeg: ''
           - build-type: 'cublas'
             cuda-major-version: 11
             cuda-minor-version: 7
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda11'
+            ffmpeg: ''
           - build-type: 'cublas'
             cuda-major-version: 12
             cuda-minor-version: 1
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda12'
+            ffmpeg: ''
+          - build-type: ''
+            platforms: 'linux/amd64,linux/arm64'
+            tag-latest: 'auto'
+            tag-suffix: ''
+            ffmpeg: 'true'
+          - build-type: 'cublas'
+            cuda-major-version: 11
+            cuda-minor-version: 7
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            tag-suffix: '-cublas-cuda11-ffmpeg'
+            ffmpeg: 'true'
+          - build-type: 'cublas'
+            cuda-major-version: 12
+            cuda-minor-version: 1
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            tag-suffix: '-cublas-cuda12-ffmpeg'
+            ffmpeg: 'true'
+
     runs-on: ubuntu-latest
     steps:
       - name: Checkout
@@ -77,6 +100,7 @@ jobs:
             BUILD_TYPE=${{ matrix.build-type }}
             CUDA_MAJOR_VERSION=${{ matrix.cuda-major-version }}
             CUDA_MINOR_VERSION=${{ matrix.cuda-minor-version }}
+            FFMPEG=${{ matrix.ffmpeg }}
           context: .
           file: ./Dockerfile
           platforms: ${{ matrix.platforms }}
diff --git a/Dockerfile b/Dockerfile
index beee4ea1..d1a81dbb 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -48,6 +48,7 @@ ARG BUILD_TYPE=
 ARG GO_TAGS=stablediffusion
 ARG CUDA_MAJOR_VERSION=11
 ARG CUDA_MINOR_VERSION=7
+ARG FFMPEG=
 
 ENV BUILD_TYPE=${BUILD_TYPE}
 ENV GO_TAGS=${GO_TAGS}
@@ -73,6 +74,12 @@ RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
     apt-get update && \
     apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
     ; fi
+
+# Add FFmpeg
+RUN if [ "${FFMPEG}" = "true" ]; then \
+    apt-get install -y ffmpeg \
+    ; fi
+
 ENV PATH /usr/local/cuda/bin:${PATH}
 
 # OpenBLAS requirements

From 203fd7b2e8731d82031f02d1f443b352cd8c5d25 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Sun, 4 Jun 2023 14:16:09 +0200
Subject: [PATCH 123/137] fix(deps): update
 github.com/go-skynet/go-ggml-transformers.cpp digest to 6fb862c (#490)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 32f63aad..3ae0f8d9 100644
--- a/go.mod
+++ b/go.mod
@@ -8,7 +8,7 @@ require (
 	github.com/go-audio/wav v1.1.0
 	github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf
 	github.com/go-skynet/go-bert.cpp v0.0.0-20230529074307-771b4a085972
-	github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230531065233-17b065584ef8
+	github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230604074754-6fb862c72bc0
 	github.com/go-skynet/go-llama.cpp v0.0.0-20230603122627-3f10005b70c6
 	github.com/gofiber/fiber/v2 v2.46.0
 	github.com/google/uuid v1.3.0
diff --git a/go.sum b/go.sum
index 3b2550bb..13072895 100644
--- a/go.sum
+++ b/go.sum
@@ -76,6 +76,8 @@ github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230529215936-13ccc22621bb
 github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230529215936-13ccc22621bb/go.mod h1:SI+oF2+THMydq8Vo4+EzKJaQwtfWOy+lr7yWPP6FR2U=
 github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230531065233-17b065584ef8 h1:LK1DAgJsNMRUWaPpFOnE8XSF70UBybr3zGOvzP8Pdok=
 github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230531065233-17b065584ef8/go.mod h1:/JbU8HZU+tUOp+1bQAeXf3AyRXm+p3UwhccoJwCTI9A=
+github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230604074754-6fb862c72bc0 h1:PKwuqqVsvSPY4W9H9r3iHVpsmMWL1MQ7I5qpiY7eh0E=
+github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230604074754-6fb862c72bc0/go.mod h1:/JbU8HZU+tUOp+1bQAeXf3AyRXm+p3UwhccoJwCTI9A=
 github.com/go-skynet/go-gpt2.cpp v0.0.0-20230523153133-3eb3a32c0874 h1:/6QWh2oarU7iPSpXj/3bLlkKptyxjKTRrNtGUrh8vhI=
 github.com/go-skynet/go-gpt2.cpp v0.0.0-20230523153133-3eb3a32c0874/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230520155239-ccf23adfb278 h1:st4ow9JKy3UuhkwutrbWof2vMFU/YxwBCLYZ1IxJ2Po=

From 4ddc9564626a1abd6f665294bc361a3f4bb23cdb Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 4 Jun 2023 17:25:35 +0200
Subject: [PATCH 124/137] deps: update rwkv, switch back to upstream (#494)

---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 040fb2ef..44242f01 100644
--- a/Makefile
+++ b/Makefile
@@ -7,8 +7,8 @@ GOLLAMA_VERSION?=3f10005b70c657c317d2cae4c22a9bd295f54a3c
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
 GPT4ALL_VERSION?=bc624f5389d656b1995b6db592f76f5853712cf6
 GOGGMLTRANSFORMERS_VERSION?=17b065584ef8f3280b6286235f0db95eec8a4b1c
-RWKV_REPO?=https://github.com/mudler/go-rwkv.cpp
-RWKV_VERSION?=f25c89f8e55a67d57c01661a16abeed1b1c25016
+RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
+RWKV_VERSION?=049c1b54798a0fb8429a0905060fa5e2d64255ca
 WHISPER_CPP_VERSION?=5b9e59bc07dd76320354f2af6be29f16dbcb21e7
 BERT_VERSION?=0548994371f7081e45fcf8d472f3941a12f179aa
 BLOOMZ_VERSION?=1834e77b83faafe912ad4092ccf7f77937349e2f

From a0e0ac887f5414c32ddfdce5bc0d24daa051781e Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Sun, 4 Jun 2023 19:36:56 +0200
Subject: [PATCH 125/137] fix(deps): update github.com/go-skynet/go-bert.cpp
 digest to 0548994 (#451)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 3ae0f8d9..3cfd7c8e 100644
--- a/go.mod
+++ b/go.mod
@@ -7,7 +7,7 @@ require (
 	github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230601124500-5b9e59bc07dd
 	github.com/go-audio/wav v1.1.0
 	github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf
-	github.com/go-skynet/go-bert.cpp v0.0.0-20230529074307-771b4a085972
+	github.com/go-skynet/go-bert.cpp v0.0.0-20230531070950-0548994371f7
 	github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230604074754-6fb862c72bc0
 	github.com/go-skynet/go-llama.cpp v0.0.0-20230603122627-3f10005b70c6
 	github.com/gofiber/fiber/v2 v2.46.0
diff --git a/go.sum b/go.sum
index 13072895..32d09e0b 100644
--- a/go.sum
+++ b/go.sum
@@ -64,6 +64,8 @@ github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4 h1:+3KPDf4Wv
 github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4/go.mod h1:VY0s5KoAI2jRCvQXKuDeEEe8KG7VaWifSNJSk+E1KtY=
 github.com/go-skynet/go-bert.cpp v0.0.0-20230529074307-771b4a085972 h1:eiE1CTqanNjpNWF2xp9GvNZXgKgRzNaUSyFZGMLu8Vo=
 github.com/go-skynet/go-bert.cpp v0.0.0-20230529074307-771b4a085972/go.mod h1:IQrVVZiAuWpneNrahrGu3m7VVaKLDIvQGp+Q6B8jw5g=
+github.com/go-skynet/go-bert.cpp v0.0.0-20230531070950-0548994371f7 h1:hm5rOxRf2Y8zmQTBgtDabLoprYHHQHmZ8ui8i4KQSgU=
+github.com/go-skynet/go-bert.cpp v0.0.0-20230531070950-0548994371f7/go.mod h1:55l02IF2kD+LGEH4yXzmPPygeuWiUIo8Nbh/+ZU9cb0=
 github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230523173010-f89d7c22df6b h1:uKICsAbdRJxMPZ4RXltwOwXPRDO1/d/pdGR3gEEUV9M=
 github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230523173010-f89d7c22df6b/go.mod h1:hjmO5UfipWl6xkPT54acOs9DDto8GPV81IvsBcvRjsA=
 github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230524084634-c4c581f1853c h1:jXUOCh2K4OzRItTtHzdxvkylE9r1szRSleRpXCNvraY=

From cdf0a6e7667e1fb3412951f078aaf017a6fd6437 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Sun, 4 Jun 2023 19:37:08 +0200
Subject: [PATCH 126/137] fix(deps): update
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to bbe195e (#497)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 3cfd7c8e..39738c79 100644
--- a/go.mod
+++ b/go.mod
@@ -15,7 +15,7 @@ require (
 	github.com/hashicorp/go-multierror v1.1.1
 	github.com/imdario/mergo v0.3.16
 	github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642
-	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230603140917-bc624f5389d6
+	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230604125924-bbe195ee0207
 	github.com/onsi/ginkgo/v2 v2.9.7
 	github.com/onsi/gomega v1.27.7
 	github.com/otiai10/openaigo v1.1.0
diff --git a/go.sum b/go.sum
index 32d09e0b..4cb7e412 100644
--- a/go.sum
+++ b/go.sum
@@ -171,6 +171,8 @@ github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230603001950-25ee51
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230603001950-25ee51e2ca3a/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230603140917-bc624f5389d6 h1:GcwtLT80QuxAC7Dg+EpCQv1k/2Abhw8kvxQn3vuit5Q=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230603140917-bc624f5389d6/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230604125924-bbe195ee0207 h1:3ObPrftXDNkEN5M87IXxRlhA13x/44CuVaHXppsNDUg=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230604125924-bbe195ee0207/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/onsi/ginkgo/v2 v2.9.5 h1:+6Hr4uxzP4XIUyAkg61dWBw8lb/gc4/X5luuxN/EC+Q=
 github.com/onsi/ginkgo/v2 v2.9.5/go.mod h1:tvAoo1QUJwNEU2ITftXTpR7R1RbCzoZUOs3RonqW57k=
 github.com/onsi/ginkgo/v2 v2.9.7 h1:06xGQy5www2oN160RtEZoTvnP2sPhEfePYmCDc2szss=

From d9130def3901cfd7d9220311a44bf20a926b8c3b Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 4 Jun 2023 21:07:12 +0200
Subject: [PATCH 127/137] fix: correctly assign ffmpeg image tag (#499)

---
 .github/workflows/image.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index baf18b42..8f28f332 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -39,8 +39,8 @@ jobs:
             ffmpeg: ''
           - build-type: ''
             platforms: 'linux/amd64,linux/arm64'
-            tag-latest: 'auto'
-            tag-suffix: ''
+            tag-latest: 'false'
+            tag-suffix: '-ffmpeg'
             ffmpeg: 'true'
           - build-type: 'cublas'
             cuda-major-version: 11

From 3070e9503a9b4d2ac46bec6e5e1261297b1423b1 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Sun, 4 Jun 2023 22:53:09 +0200
Subject: [PATCH 128/137] fix(deps): update github.com/go-skynet/bloomz.cpp
 digest to 1834e77 (#414)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 39738c79..edaa6222 100644
--- a/go.mod
+++ b/go.mod
@@ -6,7 +6,7 @@ require (
 	github.com/donomii/go-rwkv.cpp v0.0.0-20230601111443-3b28b09469fc
 	github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230601124500-5b9e59bc07dd
 	github.com/go-audio/wav v1.1.0
-	github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf
+	github.com/go-skynet/bloomz.cpp v0.0.0-20230529155654-1834e77b83fa
 	github.com/go-skynet/go-bert.cpp v0.0.0-20230531070950-0548994371f7
 	github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230604074754-6fb862c72bc0
 	github.com/go-skynet/go-llama.cpp v0.0.0-20230603122627-3f10005b70c6
diff --git a/go.sum b/go.sum
index 4cb7e412..7c08248f 100644
--- a/go.sum
+++ b/go.sum
@@ -60,6 +60,8 @@ github.com/go-openapi/swag v0.19.15 h1:D2NRCBzS9/pEY3gP9Nl8aDqGUcPFrwG2p+CNFrLyr
 github.com/go-openapi/swag v0.19.15/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ=
 github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf h1:VJfSn8hIDE+K5+h38M3iAyFXrxpRExMKRdTk33UDxsw=
 github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf/go.mod h1:wc0fJ9V04yiYTfgKvE5RUUSRQ5Kzi0Bo4I+U3nNOUuA=
+github.com/go-skynet/bloomz.cpp v0.0.0-20230529155654-1834e77b83fa h1:gxr68r/6EWroay4iI81jxqGCDbKotY4+CiwdUkBz2NQ=
+github.com/go-skynet/bloomz.cpp v0.0.0-20230529155654-1834e77b83fa/go.mod h1:wc0fJ9V04yiYTfgKvE5RUUSRQ5Kzi0Bo4I+U3nNOUuA=
 github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4 h1:+3KPDf4Wv1VHOkzAfZnlj9qakLSYggTpm80AswhD/FU=
 github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4/go.mod h1:VY0s5KoAI2jRCvQXKuDeEEe8KG7VaWifSNJSk+E1KtY=
 github.com/go-skynet/go-bert.cpp v0.0.0-20230529074307-771b4a085972 h1:eiE1CTqanNjpNWF2xp9GvNZXgKgRzNaUSyFZGMLu8Vo=

From e873fc7b71efc660efe94b0531bac15938132712 Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Mon, 5 Jun 2023 00:07:48 +0200
Subject: [PATCH 129/137] :arrow_up: Update go-skynet/go-ggml-transformers.cpp
 (#501)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 44242f01..90a48e94 100644
--- a/Makefile
+++ b/Makefile
@@ -6,7 +6,7 @@ BINARY_NAME=local-ai
 GOLLAMA_VERSION?=3f10005b70c657c317d2cae4c22a9bd295f54a3c
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
 GPT4ALL_VERSION?=bc624f5389d656b1995b6db592f76f5853712cf6
-GOGGMLTRANSFORMERS_VERSION?=17b065584ef8f3280b6286235f0db95eec8a4b1c
+GOGGMLTRANSFORMERS_VERSION?=6fb862c72bc04568120e711b176defe116d3751e
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=049c1b54798a0fb8429a0905060fa5e2d64255ca
 WHISPER_CPP_VERSION?=5b9e59bc07dd76320354f2af6be29f16dbcb21e7

From b503725dc7c4d53d4d585dfda6c414e404273110 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 5 Jun 2023 09:42:50 +0200
Subject: [PATCH 130/137] fix: downgrade gpt4all (#503)

Signed-off-by: mudler <mudler@mocaccino.org>
---
 Makefile        |  8 +++++---
 api/api_test.go | 32 +++++++++++++++++++++++++++++++-
 2 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/Makefile b/Makefile
index 90a48e94..6f42e0e7 100644
--- a/Makefile
+++ b/Makefile
@@ -4,8 +4,8 @@ GOVET=$(GOCMD) vet
 BINARY_NAME=local-ai
 
 GOLLAMA_VERSION?=3f10005b70c657c317d2cae4c22a9bd295f54a3c
-GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
-GPT4ALL_VERSION?=bc624f5389d656b1995b6db592f76f5853712cf6
+GPT4ALL_REPO?=https://github.com/go-skynet/gpt4all
+GPT4ALL_VERSION?=f7498c9
 GOGGMLTRANSFORMERS_VERSION?=6fb862c72bc04568120e711b176defe116d3751e
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=049c1b54798a0fb8429a0905060fa5e2d64255ca
@@ -232,8 +232,10 @@ test-models/testmodel:
 	cp tests/models_fixtures/* test-models
 
 test: prepare test-models/testmodel
+	cp -r backend-assets api
 	cp tests/models_fixtures/* test-models
-	C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./api ./pkg
+	C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all" --flake-attempts 5 -v -r ./api ./pkg
+	C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts 5 -v -r ./api ./pkg
 
 ## Help:
 help: ## Show this help.
diff --git a/api/api_test.go b/api/api_test.go
index e1d3f712..54118b88 100644
--- a/api/api_test.go
+++ b/api/api_test.go
@@ -3,6 +3,7 @@ package api_test
 import (
 	"bytes"
 	"context"
+	"embed"
 	"encoding/json"
 	"fmt"
 	"io/ioutil"
@@ -95,6 +96,9 @@ func postModelApplyRequest(url string, request modelApplyRequest) (response map[
 	return
 }
 
+//go:embed backend-assets/*
+var backendAssets embed.FS
+
 var _ = Describe("API test", func() {
 
 	var app *fiber.App
@@ -114,7 +118,7 @@ var _ = Describe("API test", func() {
 			modelLoader = model.NewModelLoader(tmpdir)
 			c, cancel = context.WithCancel(context.Background())
 
-			app, err = App(WithContext(c), WithModelLoader(modelLoader))
+			app, err = App(WithContext(c), WithModelLoader(modelLoader), WithBackendAssets(backendAssets), WithBackendAssetsOutput(tmpdir))
 			Expect(err).ToNot(HaveOccurred())
 			go app.Listen("127.0.0.1:9090")
 
@@ -191,6 +195,32 @@ var _ = Describe("API test", func() {
 				Expect(err).ToNot(HaveOccurred())
 				Expect(content["backend"]).To(Equal("bert-embeddings"))
 			})
+			It("runs gpt4all", Label("gpt4all"), func() {
+				if runtime.GOOS != "linux" {
+					Skip("test supported only on linux")
+				}
+
+				response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
+					URL:       "github:go-skynet/model-gallery/gpt4all-j.yaml",
+					Name:      "gpt4all-j",
+					Overrides: map[string]string{},
+				})
+
+				Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
+
+				uuid := response["uuid"].(string)
+
+				Eventually(func() bool {
+					response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
+					fmt.Println(response)
+					return response["processed"].(bool)
+				}, "360s").Should(Equal(true))
+
+				resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-j", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "How are you?"}}})
+				Expect(err).ToNot(HaveOccurred())
+				Expect(len(resp.Choices)).To(Equal(1))
+				Expect(resp.Choices[0].Message.Content).To(ContainSubstring("well"))
+			})
 		})
 	})
 

From ec4fd1d219534710da690df1c7141f5feb3deefb Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 5 Jun 2023 14:26:20 +0200
Subject: [PATCH 131/137] fix gpt4all, add metal GPU support (#507)

---
 Makefile | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 6f42e0e7..4df01906 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@ GOTEST=$(GOCMD) test
 GOVET=$(GOCMD) vet
 BINARY_NAME=local-ai
 
-GOLLAMA_VERSION?=3f10005b70c657c317d2cae4c22a9bd295f54a3c
+GOLLAMA_VERSION?=b1a425611fde4c5cf8f6ff523088304cfe49d6f5
 GPT4ALL_REPO?=https://github.com/go-skynet/gpt4all
 GPT4ALL_VERSION?=f7498c9
 GOGGMLTRANSFORMERS_VERSION?=6fb862c72bc04568120e711b176defe116d3751e
@@ -12,7 +12,7 @@ RWKV_VERSION?=049c1b54798a0fb8429a0905060fa5e2d64255ca
 WHISPER_CPP_VERSION?=5b9e59bc07dd76320354f2af6be29f16dbcb21e7
 BERT_VERSION?=0548994371f7081e45fcf8d472f3941a12f179aa
 BLOOMZ_VERSION?=1834e77b83faafe912ad4092ccf7f77937349e2f
-BUILD_TYPE?=
+export BUILD_TYPE?=
 CGO_LDFLAGS?=
 CUDA_LIBPATH?=/usr/local/cuda/lib64/
 STABLEDIFFUSION_VERSION?=c0748eca3642d58bcf9521108bcee46959c647dc
@@ -41,6 +41,11 @@ ifeq ($(BUILD_TYPE),cublas)
 	export LLAMA_CUBLAS=1
 endif
 
+ifeq ($(BUILD_TYPE),metal)
+	CGO_LDFLAGS+=-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
+	export LLAMA_METAL=1
+endif
+
 ifeq ($(BUILD_TYPE),clblas)
 	CGO_LDFLAGS+=-lOpenCL -lclblast
 endif
@@ -66,6 +71,12 @@ gpt4all:
 	@find ./gpt4all -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gpt4all_/g' {} +
 	@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gpt4all_/g' {} +
 	@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gpt4all_/g' {} +
+	@find ./gpt4all -type f -name "*.c" -exec sed -i'' -e 's/llama_/llama_gpt4all_/g' {} +
+	@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/llama_/llama_gpt4all_/g' {} +
+	@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/llama_/llama_gpt4all_/g' {} +
+	@find ./gpt4all/gpt4all-backend -type f -name "llama_util.h" -execdir mv {} "llama_gpt4all_util.h" \;
+	@find ./gpt4all -type f -name "*.cmake" -exec sed -i'' -e 's/llama_util/llama_gpt4all_util/g' {} +
+	@find ./gpt4all -type f -name "*.txt" -exec sed -i'' -e 's/llama_util/llama_gpt4all_util/g' {} +
 	@find ./gpt4all/gpt4all-bindings/golang -type f -name "*.cpp" -exec sed -i'' -e 's/load_model/load_gpt4all_model/g' {} +
 	@find ./gpt4all/gpt4all-bindings/golang -type f -name "*.go" -exec sed -i'' -e 's/load_model/load_gpt4all_model/g' {} +
 	@find ./gpt4all/gpt4all-bindings/golang -type f -name "*.h" -exec sed -i'' -e 's/load_model/load_gpt4all_model/g' {} +

From b447a2a7194e11de2845d2dad42033125bc9bfad Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 5 Jun 2023 17:21:38 +0200
Subject: [PATCH 132/137] feat: support upscaled image generation with esrgan
 (#509)

---
 Makefile                        |  2 +-
 go.mod                          |  2 +-
 pkg/stablediffusion/generate.go | 12 ++++++++++++
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 4df01906..51238fd9 100644
--- a/Makefile
+++ b/Makefile
@@ -15,7 +15,7 @@ BLOOMZ_VERSION?=1834e77b83faafe912ad4092ccf7f77937349e2f
 export BUILD_TYPE?=
 CGO_LDFLAGS?=
 CUDA_LIBPATH?=/usr/local/cuda/lib64/
-STABLEDIFFUSION_VERSION?=c0748eca3642d58bcf9521108bcee46959c647dc
+STABLEDIFFUSION_VERSION?=d89260f598afb809279bc72aa0107b4292587632
 GO_TAGS?=
 BUILD_ID?=git
 LD_FLAGS=?=
diff --git a/go.mod b/go.mod
index edaa6222..d9666c37 100644
--- a/go.mod
+++ b/go.mod
@@ -14,7 +14,7 @@ require (
 	github.com/google/uuid v1.3.0
 	github.com/hashicorp/go-multierror v1.1.1
 	github.com/imdario/mergo v0.3.16
-	github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642
+	github.com/mudler/go-stable-diffusion v0.0.0-20230605114250-a6706a426a90
 	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230604125924-bbe195ee0207
 	github.com/onsi/ginkgo/v2 v2.9.7
 	github.com/onsi/gomega v1.27.7
diff --git a/pkg/stablediffusion/generate.go b/pkg/stablediffusion/generate.go
index 97989e90..cef96e80 100644
--- a/pkg/stablediffusion/generate.go
+++ b/pkg/stablediffusion/generate.go
@@ -8,6 +8,18 @@ import (
 )
 
 func GenerateImage(height, width, mode, step, seed int, positive_prompt, negative_prompt, dst, asset_dir string) error {
+	if height > 512 || width > 512 {
+		return stableDiffusion.GenerateImageUpscaled(
+			height,
+			width,
+			step,
+			seed,
+			positive_prompt,
+			negative_prompt,
+			dst,
+			asset_dir,
+		)
+	}
 	return stableDiffusion.GenerateImage(
 		height,
 		width,

From ac17d544e087a8f2926d882679950e86f3ace981 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Mon, 5 Jun 2023 17:21:57 +0200
Subject: [PATCH 133/137] fix(deps): update github.com/go-skynet/go-llama.cpp
 digest to b1a4256 (#505)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index d9666c37..c0569db7 100644
--- a/go.mod
+++ b/go.mod
@@ -9,7 +9,7 @@ require (
 	github.com/go-skynet/bloomz.cpp v0.0.0-20230529155654-1834e77b83fa
 	github.com/go-skynet/go-bert.cpp v0.0.0-20230531070950-0548994371f7
 	github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230604074754-6fb862c72bc0
-	github.com/go-skynet/go-llama.cpp v0.0.0-20230603122627-3f10005b70c6
+	github.com/go-skynet/go-llama.cpp v0.0.0-20230604235446-b1a425611fde
 	github.com/gofiber/fiber/v2 v2.46.0
 	github.com/google/uuid v1.3.0
 	github.com/hashicorp/go-multierror v1.1.1
diff --git a/go.sum b/go.sum
index 7c08248f..5c4347c8 100644
--- a/go.sum
+++ b/go.sum
@@ -100,6 +100,8 @@ github.com/go-skynet/go-llama.cpp v0.0.0-20230531065249-10caf37d8b73 h1:swwsrYpP
 github.com/go-skynet/go-llama.cpp v0.0.0-20230531065249-10caf37d8b73/go.mod h1:ddYIvPZyj3Vf4XkfZimVRRehZu2isd0JXfK3EemVQPk=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230603122627-3f10005b70c6 h1:w+S5j+znKE8ZKogSp0tcdmYO/v94Wym0g9Os+iWEu2w=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230603122627-3f10005b70c6/go.mod h1:ddYIvPZyj3Vf4XkfZimVRRehZu2isd0JXfK3EemVQPk=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230604235446-b1a425611fde h1:bnWCcst0K5lgK2MCJbxV81xPSiK4fiob9f4k2RjYN8A=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230604235446-b1a425611fde/go.mod h1:ddYIvPZyj3Vf4XkfZimVRRehZu2isd0JXfK3EemVQPk=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
 github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=

From 01f5046cafb1784a277a416296ff9f6cd96a4711 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Mon, 5 Jun 2023 17:22:21 +0200
Subject: [PATCH 134/137] fix(deps): update github.com/tmc/langchaingo digest
 to 4afed6d (#508)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index c0569db7..c6aa40f6 100644
--- a/go.mod
+++ b/go.mod
@@ -22,7 +22,7 @@ require (
 	github.com/rs/zerolog v1.29.1
 	github.com/sashabaranov/go-openai v1.9.5
 	github.com/swaggo/swag v1.16.1
-	github.com/tmc/langchaingo v0.0.0-20230530193922-fb062652f841
+	github.com/tmc/langchaingo v0.0.0-20230605114752-4afed6d7be4a
 	github.com/urfave/cli/v2 v2.25.5
 	github.com/valyala/fasthttp v1.47.0
 	gopkg.in/yaml.v2 v2.4.0
diff --git a/go.sum b/go.sum
index 5c4347c8..612f47e1 100644
--- a/go.sum
+++ b/go.sum
@@ -220,6 +220,8 @@ github.com/tinylib/msgp v1.1.8 h1:FCXC1xanKO4I8plpHGH2P7koL/RzZs12l/+r7vakfm0=
 github.com/tinylib/msgp v1.1.8/go.mod h1:qkpG+2ldGg4xRFmx+jfTvZPxfGFhi64BcnL9vkCm/Tw=
 github.com/tmc/langchaingo v0.0.0-20230530193922-fb062652f841 h1:IVlfKPZzq3W1G+CkhZgN5VjmHnAeB3YqEvxyNPPCZXY=
 github.com/tmc/langchaingo v0.0.0-20230530193922-fb062652f841/go.mod h1:6l1WoyqVDwkv7cFlY3gfcTv8yVowVyuutKv8PGlQCWI=
+github.com/tmc/langchaingo v0.0.0-20230605114752-4afed6d7be4a h1:YtKJTKbM3qu60+ZxLtyeCl0RvdG7LKbyF8TT7nzV6Gg=
+github.com/tmc/langchaingo v0.0.0-20230605114752-4afed6d7be4a/go.mod h1:6l1WoyqVDwkv7cFlY3gfcTv8yVowVyuutKv8PGlQCWI=
 github.com/urfave/cli/v2 v2.25.3 h1:VJkt6wvEBOoSjPFQvOkv6iWIrsJyCrKGtCtxXWwmGeY=
 github.com/urfave/cli/v2 v2.25.3/go.mod h1:GHupkWPMM0M/sj1a2b4wUrWBPzazNrIjouW6fmdJLxc=
 github.com/urfave/cli/v2 v2.25.5 h1:d0NIAyhh5shGscroL7ek/Ya9QYQE0KNabJgiUinIQkc=

From 3ebdb9b67ef2bb6f661222dbdfb165d01405aa3a Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Mon, 5 Jun 2023 21:24:04 +0200
Subject: [PATCH 135/137] fix(deps): update module
 github.com/sashabaranov/go-openai to v1.10.0 (#510)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index c6aa40f6..e3eef414 100644
--- a/go.mod
+++ b/go.mod
@@ -20,7 +20,7 @@ require (
 	github.com/onsi/gomega v1.27.7
 	github.com/otiai10/openaigo v1.1.0
 	github.com/rs/zerolog v1.29.1
-	github.com/sashabaranov/go-openai v1.9.5
+	github.com/sashabaranov/go-openai v1.10.0
 	github.com/swaggo/swag v1.16.1
 	github.com/tmc/langchaingo v0.0.0-20230605114752-4afed6d7be4a
 	github.com/urfave/cli/v2 v2.25.5
diff --git a/go.sum b/go.sum
index 612f47e1..e5e2a6a4 100644
--- a/go.sum
+++ b/go.sum
@@ -147,6 +147,8 @@ github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWV
 github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
 github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642 h1:KTkh3lOUsGqQyP4v+oa38sPFdrZtNnM4HaxTb3epdYs=
 github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642/go.mod h1:8ufRkpz/S/9ahkaxzZ5i4WMgO9w4InEhuRoT7vK5Rnw=
+github.com/mudler/go-stable-diffusion v0.0.0-20230605114250-a6706a426a90 h1:rxKtdI8RCZ41ZNbUh9jyBBy2pi3ukQP88ZzsrSVnpxY=
+github.com/mudler/go-stable-diffusion v0.0.0-20230605114250-a6706a426a90/go.mod h1:8ufRkpz/S/9ahkaxzZ5i4WMgO9w4InEhuRoT7vK5Rnw=
 github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs=
 github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522220313-2ce22208a3dd h1:is/rE0YD8oEWcX3fQ+VxoS3fD0LqFEmTxh8XZegYYsA=
@@ -204,6 +206,8 @@ github.com/sashabaranov/go-openai v1.9.4 h1:KanoCEoowAI45jVXlenMCckutSRr39qOmSi9
 github.com/sashabaranov/go-openai v1.9.4/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
 github.com/sashabaranov/go-openai v1.9.5 h1:z1VCMXsfnug+U0ceTTIXr/L26AYl9jafqA9lptlSX0c=
 github.com/sashabaranov/go-openai v1.9.5/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
+github.com/sashabaranov/go-openai v1.10.0 h1:uUD3EOKDdGa6geMVbe2Trj9/ckF9sCV5jpQM19f7GM8=
+github.com/sashabaranov/go-openai v1.10.0/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
 github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 h1:rmMl4fXJhKMNWl+K+r/fq4FbbKI+Ia2m9hYBLm2h4G4=
 github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94/go.mod h1:90zrgN3D/WJsDd1iXHT96alCoN2KJo6/4x1DZC3wZs8=
 github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d/go.mod h1:Gy+0tqhJvgGlqnTF8CVGP0AaGRjwBtXs/a5PA0Y3+A4=

From 2e916abe15e6750732a122a2ebd1068dbb211471 Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Tue, 6 Jun 2023 00:35:01 +0200
Subject: [PATCH 136/137] :arrow_up: Update go-skynet/go-llama.cpp (#512)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 51238fd9..e39fb481 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@ GOTEST=$(GOCMD) test
 GOVET=$(GOCMD) vet
 BINARY_NAME=local-ai
 
-GOLLAMA_VERSION?=b1a425611fde4c5cf8f6ff523088304cfe49d6f5
+GOLLAMA_VERSION?=cca84ed55fb920ccdd6158958b2c9b773ce17eea
 GPT4ALL_REPO?=https://github.com/go-skynet/gpt4all
 GPT4ALL_VERSION?=f7498c9
 GOGGMLTRANSFORMERS_VERSION?=6fb862c72bc04568120e711b176defe116d3751e

From e1e23a6302548a898a8526a9e37ed28ca2d1ba00 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Tue, 6 Jun 2023 00:35:16 +0200
Subject: [PATCH 137/137] fix(deps): update
 github.com/mudler/go-stable-diffusion digest to d89260f (#506)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index e3eef414..2cf95cc7 100644
--- a/go.mod
+++ b/go.mod
@@ -14,7 +14,7 @@ require (
 	github.com/google/uuid v1.3.0
 	github.com/hashicorp/go-multierror v1.1.1
 	github.com/imdario/mergo v0.3.16
-	github.com/mudler/go-stable-diffusion v0.0.0-20230605114250-a6706a426a90
+	github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af
 	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230604125924-bbe195ee0207
 	github.com/onsi/ginkgo/v2 v2.9.7
 	github.com/onsi/gomega v1.27.7
diff --git a/go.sum b/go.sum
index e5e2a6a4..5e5a6ad2 100644
--- a/go.sum
+++ b/go.sum
@@ -149,6 +149,8 @@ github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642 h1:KTkh
 github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642/go.mod h1:8ufRkpz/S/9ahkaxzZ5i4WMgO9w4InEhuRoT7vK5Rnw=
 github.com/mudler/go-stable-diffusion v0.0.0-20230605114250-a6706a426a90 h1:rxKtdI8RCZ41ZNbUh9jyBBy2pi3ukQP88ZzsrSVnpxY=
 github.com/mudler/go-stable-diffusion v0.0.0-20230605114250-a6706a426a90/go.mod h1:8ufRkpz/S/9ahkaxzZ5i4WMgO9w4InEhuRoT7vK5Rnw=
+github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af h1:XFq6OUqsWQam0OrEr05okXsJK/TQur3zoZTHbiZD3Ks=
+github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af/go.mod h1:8ufRkpz/S/9ahkaxzZ5i4WMgO9w4InEhuRoT7vK5Rnw=
 github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs=
 github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522220313-2ce22208a3dd h1:is/rE0YD8oEWcX3fQ+VxoS3fD0LqFEmTxh8XZegYYsA=