From cc9aa9eb3ff6bc511a4988379dbde1cc853d1239 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 18 May 2023 15:59:03 +0200
Subject: [PATCH 01/32] feat: add /models/apply endpoint to prepare models
 (#286)

---
 Makefile                                      |   6 +-
 README.md                                     |   7 +-
 api/api.go                                    |  27 +-
 api/api_test.go                               |  13 +-
 api/config.go                                 |  44 +++-
 api/gallery.go                                | 146 +++++++++++
 api/openai.go                                 |  16 +-
 main.go                                       |   7 +-
 pkg/gallery/gallery_suite_test.go             |  13 +
 pkg/gallery/models.go                         | 237 ++++++++++++++++++
 pkg/gallery/models_test.go                    |  30 +++
 pkg/model/initializers.go                     |   3 +-
 tests/fixtures/gallery_simple.yaml            |  40 +++
 .../completion.tmpl                           |   0
 .../{fixtures => models_fixtures}/config.yaml |   0
 .../embeddings.yaml                           |   0
 .../ggml-gpt4all-j.tmpl                       |   0
 tests/{fixtures => models_fixtures}/gpt4.yaml |   0
 .../{fixtures => models_fixtures}/gpt4_2.yaml |   0
 tests/{fixtures => models_fixtures}/rwkv.yaml |   0
 .../rwkv_chat.tmpl                            |   0
 .../rwkv_completion.tmpl                      |   0
 .../whisper.yaml                              |   0
 23 files changed, 556 insertions(+), 33 deletions(-)
 create mode 100644 api/gallery.go
 create mode 100644 pkg/gallery/gallery_suite_test.go
 create mode 100644 pkg/gallery/models.go
 create mode 100644 pkg/gallery/models_test.go
 create mode 100644 tests/fixtures/gallery_simple.yaml
 rename tests/{fixtures => models_fixtures}/completion.tmpl (100%)
 rename tests/{fixtures => models_fixtures}/config.yaml (100%)
 rename tests/{fixtures => models_fixtures}/embeddings.yaml (100%)
 rename tests/{fixtures => models_fixtures}/ggml-gpt4all-j.tmpl (100%)
 rename tests/{fixtures => models_fixtures}/gpt4.yaml (100%)
 rename tests/{fixtures => models_fixtures}/gpt4_2.yaml (100%)
 rename tests/{fixtures => models_fixtures}/rwkv.yaml (100%)
 rename tests/{fixtures => models_fixtures}/rwkv_chat.tmpl (100%)
 rename tests/{fixtures => models_fixtures}/rwkv_completion.tmpl (100%)
 rename tests/{fixtures => models_fixtures}/whisper.yaml (100%)

diff --git a/Makefile b/Makefile
index 523f1523..ea750230 100644
--- a/Makefile
+++ b/Makefile
@@ -211,11 +211,11 @@ test-models/testmodel:
 	wget https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
 	wget https://huggingface.co/imxcstar/rwkv-4-raven-ggml/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%25-Other1%25-20230425-ctx4096-16_Q4_2.bin -O test-models/rwkv
 	wget https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O test-models/rwkv.tokenizer.json
-	cp tests/fixtures/* test-models
+	cp tests/models_fixtures/* test-models
 
 test: prepare test-models/testmodel
-	cp tests/fixtures/* test-models
-	@C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} TEST_DIR=$(abspath ./)/test-dir/ CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo -v -r ./api
+	cp tests/models_fixtures/* test-models
+	C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo -v -r ./api ./pkg
 
 ## Help:
 help: ## Show this help.
diff --git a/README.md b/README.md
index 1cbb4143..3a4cbb95 100644
--- a/README.md
+++ b/README.md
@@ -11,11 +11,12 @@
 
 **LocalAI** is a drop-in replacement REST API compatible with OpenAI API specifications for local inferencing. It allows to run models locally or on-prem with consumer grade hardware, supporting multiple models families compatible with the `ggml` format. For a list of the supported model families, see [the model compatibility table below](https://github.com/go-skynet/LocalAI#model-compatibility-table).
 
-- OpenAI drop-in alternative REST API
+- Local, OpenAI drop-in alternative REST API. You own your data.
 - Supports multiple models, Audio transcription, Text generation with GPTs, Image generation with stable diffusion (experimental)
 - Once loaded the first time, it keep models loaded in memory for faster inference
 - Support for prompt templates
 - Doesn't shell-out, but uses C++ bindings for a faster inference and better performance. 
+- NO GPU required. NO Internet access is required either. Optional, GPU Acceleration is available in `llama.cpp`-compatible LLMs. [See building instructions](https://github.com/go-skynet/LocalAI#cublas).
 
 LocalAI is a community-driven project, focused on making the AI accessible to anyone. Any contribution, feedback and PR is welcome! It was initially created by [mudler](https://github.com/mudler/) at the [SpectroCloud OSS Office](https://github.com/spectrocloud).
 
@@ -434,7 +435,7 @@ local-ai --models-path <model_path> [--address <address>] [--threads <num_thread
 | debug | DEBUG         | false           | Enable debug mode. |
 | config-file | CONFIG_FILE         | empty           | Path to a LocalAI config file. |
 | upload_limit | UPLOAD_LIMIT         | 5MB           | Upload limit for whisper. |
-| image-dir | CONFIG_FILE         | empty           | Image directory to store and serve processed images. |
+| image-path | IMAGE_PATH         | empty           | Image directory to store and serve processed images. |
 
 </details>
 
@@ -567,6 +568,8 @@ Note: CuBLAS support is experimental, and has not been tested on real HW. please
 make BUILD_TYPE=cublas build
 ```
 
+More informations available in the upstream PR: https://github.com/ggerganov/llama.cpp/pull/1412
+
 </details>
 
 ### Windows compatibility
diff --git a/api/api.go b/api/api.go
index ecf56b09..ec7c9815 100644
--- a/api/api.go
+++ b/api/api.go
@@ -1,6 +1,7 @@
 package api
 
 import (
+	"context"
 	"errors"
 
 	model "github.com/go-skynet/LocalAI/pkg/model"
@@ -12,7 +13,7 @@ import (
 	"github.com/rs/zerolog/log"
 )
 
-func App(configFile string, loader *model.ModelLoader, uploadLimitMB, threads, ctxSize int, f16 bool, debug, disableMessage bool, imageDir string) *fiber.App {
+func App(c context.Context, configFile string, loader *model.ModelLoader, uploadLimitMB, threads, ctxSize int, f16 bool, debug, disableMessage bool, imageDir string) *fiber.App {
 	zerolog.SetGlobalLevel(zerolog.InfoLevel)
 	if debug {
 		zerolog.SetGlobalLevel(zerolog.DebugLevel)
@@ -48,7 +49,7 @@ func App(configFile string, loader *model.ModelLoader, uploadLimitMB, threads, c
 		}))
 	}
 
-	cm := make(ConfigMerger)
+	cm := NewConfigMerger()
 	if err := cm.LoadConfigs(loader.ModelPath); err != nil {
 		log.Error().Msgf("error loading config files: %s", err.Error())
 	}
@@ -60,39 +61,51 @@ func App(configFile string, loader *model.ModelLoader, uploadLimitMB, threads, c
 	}
 
 	if debug {
-		for k, v := range cm {
-			log.Debug().Msgf("Model: %s (config: %+v)", k, v)
+		for _, v := range cm.ListConfigs() {
+			cfg, _ := cm.GetConfig(v)
+			log.Debug().Msgf("Model: %s (config: %+v)", v, cfg)
 		}
 	}
 	// Default middleware config
 	app.Use(recover.New())
 	app.Use(cors.New())
 
+	// LocalAI API endpoints
+	applier := newGalleryApplier(loader.ModelPath)
+	applier.start(c, cm)
+	app.Post("/models/apply", applyModelGallery(loader.ModelPath, cm, applier.C))
+	app.Get("/models/jobs/:uid", getOpStatus(applier))
+
 	// openAI compatible API endpoint
+
+	// chat
 	app.Post("/v1/chat/completions", chatEndpoint(cm, debug, loader, threads, ctxSize, f16))
 	app.Post("/chat/completions", chatEndpoint(cm, debug, loader, threads, ctxSize, f16))
 
+	// edit
 	app.Post("/v1/edits", editEndpoint(cm, debug, loader, threads, ctxSize, f16))
 	app.Post("/edits", editEndpoint(cm, debug, loader, threads, ctxSize, f16))
 
+	// completion
 	app.Post("/v1/completions", completionEndpoint(cm, debug, loader, threads, ctxSize, f16))
 	app.Post("/completions", completionEndpoint(cm, debug, loader, threads, ctxSize, f16))
 
+	// embeddings
 	app.Post("/v1/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16))
 	app.Post("/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16))
-
-	// /v1/engines/{engine_id}/embeddings
-
 	app.Post("/v1/engines/:model/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16))
 
+	// audio
 	app.Post("/v1/audio/transcriptions", transcriptEndpoint(cm, debug, loader, threads, ctxSize, f16))
 
+	// images
 	app.Post("/v1/images/generations", imageEndpoint(cm, debug, loader, imageDir))
 
 	if imageDir != "" {
 		app.Static("/generated-images", imageDir)
 	}
 
+	// models
 	app.Get("/v1/models", listModels(loader, cm))
 	app.Get("/models", listModels(loader, cm))
 
diff --git a/api/api_test.go b/api/api_test.go
index f2af0388..1a5d7d40 100644
--- a/api/api_test.go
+++ b/api/api_test.go
@@ -22,10 +22,14 @@ var _ = Describe("API test", func() {
 	var modelLoader *model.ModelLoader
 	var client *openai.Client
 	var client2 *openaigo.Client
+	var c context.Context
+	var cancel context.CancelFunc
 	Context("API query", func() {
 		BeforeEach(func() {
 			modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
-			app = App("", modelLoader, 15, 1, 512, false, true, true, "")
+			c, cancel = context.WithCancel(context.Background())
+
+			app = App(c, "", modelLoader, 15, 1, 512, false, true, true, "")
 			go app.Listen("127.0.0.1:9090")
 
 			defaultConfig := openai.DefaultConfig("")
@@ -42,6 +46,7 @@ var _ = Describe("API test", func() {
 			}, "2m").ShouldNot(HaveOccurred())
 		})
 		AfterEach(func() {
+			cancel()
 			app.Shutdown()
 		})
 		It("returns the models list", func() {
@@ -140,7 +145,9 @@ var _ = Describe("API test", func() {
 	Context("Config file", func() {
 		BeforeEach(func() {
 			modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
-			app = App(os.Getenv("CONFIG_FILE"), modelLoader, 5, 1, 512, false, true, true, "")
+			c, cancel = context.WithCancel(context.Background())
+
+			app = App(c, os.Getenv("CONFIG_FILE"), modelLoader, 5, 1, 512, false, true, true, "")
 			go app.Listen("127.0.0.1:9090")
 
 			defaultConfig := openai.DefaultConfig("")
@@ -155,10 +162,10 @@ var _ = Describe("API test", func() {
 			}, "2m").ShouldNot(HaveOccurred())
 		})
 		AfterEach(func() {
+			cancel()
 			app.Shutdown()
 		})
 		It("can generate chat completions from config file", func() {
-
 			models, err := client.ListModels(context.TODO())
 			Expect(err).ToNot(HaveOccurred())
 			Expect(len(models.Models)).To(Equal(12))
diff --git a/api/config.go b/api/config.go
index 7379978e..7e0d8264 100644
--- a/api/config.go
+++ b/api/config.go
@@ -7,6 +7,7 @@ import (
 	"os"
 	"path/filepath"
 	"strings"
+	"sync"
 
 	model "github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
@@ -43,8 +44,16 @@ type TemplateConfig struct {
 	Edit       string `yaml:"edit"`
 }
 
-type ConfigMerger map[string]Config
+type ConfigMerger struct {
+	configs map[string]Config
+	sync.Mutex
+}
 
+func NewConfigMerger() *ConfigMerger {
+	return &ConfigMerger{
+		configs: make(map[string]Config),
+	}
+}
 func ReadConfigFile(file string) ([]*Config, error) {
 	c := &[]*Config{}
 	f, err := os.ReadFile(file)
@@ -72,28 +81,51 @@ func ReadConfig(file string) (*Config, error) {
 }
 
 func (cm ConfigMerger) LoadConfigFile(file string) error {
+	cm.Lock()
+	defer cm.Unlock()
 	c, err := ReadConfigFile(file)
 	if err != nil {
 		return fmt.Errorf("cannot load config file: %w", err)
 	}
 
 	for _, cc := range c {
-		cm[cc.Name] = *cc
+		cm.configs[cc.Name] = *cc
 	}
 	return nil
 }
 
 func (cm ConfigMerger) LoadConfig(file string) error {
+	cm.Lock()
+	defer cm.Unlock()
 	c, err := ReadConfig(file)
 	if err != nil {
 		return fmt.Errorf("cannot read config file: %w", err)
 	}
 
-	cm[c.Name] = *c
+	cm.configs[c.Name] = *c
 	return nil
 }
 
+func (cm ConfigMerger) GetConfig(m string) (Config, bool) {
+	cm.Lock()
+	defer cm.Unlock()
+	v, exists := cm.configs[m]
+	return v, exists
+}
+
+func (cm ConfigMerger) ListConfigs() []string {
+	cm.Lock()
+	defer cm.Unlock()
+	var res []string
+	for k := range cm.configs {
+		res = append(res, k)
+	}
+	return res
+}
+
 func (cm ConfigMerger) LoadConfigs(path string) error {
+	cm.Lock()
+	defer cm.Unlock()
 	files, err := ioutil.ReadDir(path)
 	if err != nil {
 		return err
@@ -106,7 +138,7 @@ func (cm ConfigMerger) LoadConfigs(path string) error {
 		}
 		c, err := ReadConfig(filepath.Join(path, file.Name()))
 		if err == nil {
-			cm[c.Name] = *c
+			cm.configs[c.Name] = *c
 		}
 	}
 
@@ -253,7 +285,7 @@ func readInput(c *fiber.Ctx, loader *model.ModelLoader, randomModel bool) (strin
 	return modelFile, input, nil
 }
 
-func readConfig(modelFile string, input *OpenAIRequest, cm ConfigMerger, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*Config, *OpenAIRequest, error) {
+func readConfig(modelFile string, input *OpenAIRequest, cm *ConfigMerger, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*Config, *OpenAIRequest, error) {
 	// Load a config file if present after the model name
 	modelConfig := filepath.Join(loader.ModelPath, modelFile+".yaml")
 	if _, err := os.Stat(modelConfig); err == nil {
@@ -263,7 +295,7 @@ func readConfig(modelFile string, input *OpenAIRequest, cm ConfigMerger, loader
 	}
 
 	var config *Config
-	cfg, exists := cm[modelFile]
+	cfg, exists := cm.GetConfig(modelFile)
 	if !exists {
 		config = &Config{
 			OpenAIRequest: defaultRequest(modelFile),
diff --git a/api/gallery.go b/api/gallery.go
new file mode 100644
index 00000000..5378c7bc
--- /dev/null
+++ b/api/gallery.go
@@ -0,0 +1,146 @@
+package api
+
+import (
+	"context"
+	"fmt"
+	"io/ioutil"
+	"net/http"
+	"sync"
+
+	"github.com/go-skynet/LocalAI/pkg/gallery"
+	"github.com/gofiber/fiber/v2"
+	"github.com/google/uuid"
+	"gopkg.in/yaml.v3"
+)
+
+type galleryOp struct {
+	req ApplyGalleryModelRequest
+	id  string
+}
+
+type galleryOpStatus struct {
+	Error     error  `json:"error"`
+	Processed bool   `json:"processed"`
+	Message   string `json:"message"`
+}
+
+type galleryApplier struct {
+	modelPath string
+	sync.Mutex
+	C        chan galleryOp
+	statuses map[string]*galleryOpStatus
+}
+
+func newGalleryApplier(modelPath string) *galleryApplier {
+	return &galleryApplier{
+		modelPath: modelPath,
+		C:         make(chan galleryOp),
+		statuses:  make(map[string]*galleryOpStatus),
+	}
+}
+func (g *galleryApplier) updatestatus(s string, op *galleryOpStatus) {
+	g.Lock()
+	defer g.Unlock()
+	g.statuses[s] = op
+}
+
+func (g *galleryApplier) getstatus(s string) *galleryOpStatus {
+	g.Lock()
+	defer g.Unlock()
+
+	return g.statuses[s]
+}
+
+func (g *galleryApplier) start(c context.Context, cm *ConfigMerger) {
+	go func() {
+		for {
+			select {
+			case <-c.Done():
+				return
+			case op := <-g.C:
+				g.updatestatus(op.id, &galleryOpStatus{Message: "processing"})
+
+				updateError := func(e error) {
+					g.updatestatus(op.id, &galleryOpStatus{Error: e, Processed: true})
+				}
+				// Send a GET request to the URL
+				response, err := http.Get(op.req.URL)
+				if err != nil {
+					updateError(err)
+					continue
+				}
+				defer response.Body.Close()
+
+				// Read the response body
+				body, err := ioutil.ReadAll(response.Body)
+				if err != nil {
+					updateError(err)
+					continue
+				}
+
+				// Unmarshal YAML data into a Config struct
+				var config gallery.Config
+				err = yaml.Unmarshal(body, &config)
+				if err != nil {
+					updateError(fmt.Errorf("failed to unmarshal YAML: %v", err))
+					continue
+				}
+
+				if err := gallery.Apply(g.modelPath, op.req.Name, &config); err != nil {
+					updateError(err)
+					continue
+				}
+
+				// Reload models
+				if err := cm.LoadConfigs(g.modelPath); err != nil {
+					updateError(err)
+					continue
+				}
+
+				g.updatestatus(op.id, &galleryOpStatus{Processed: true, Message: "completed"})
+			}
+		}
+	}()
+}
+
+// endpoints
+
+type ApplyGalleryModelRequest struct {
+	URL  string `json:"url"`
+	Name string `json:"name"`
+}
+
+func getOpStatus(g *galleryApplier) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+
+		status := g.getstatus(c.Params("uid"))
+		if status == nil {
+			return fmt.Errorf("could not find any status for ID")
+		}
+
+		return c.JSON(status)
+	}
+}
+
+func applyModelGallery(modelPath string, cm *ConfigMerger, g chan galleryOp) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		input := new(ApplyGalleryModelRequest)
+		// Get input data from the request body
+		if err := c.BodyParser(input); err != nil {
+			return err
+		}
+
+		uuid, err := uuid.NewUUID()
+		if err != nil {
+			return err
+		}
+		g <- galleryOp{
+			req: *input,
+			id:  uuid.String(),
+		}
+		return c.JSON(struct {
+			ID        string `json:"uid"`
+			StatusURL string `json:"status"`
+		}{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
+	}
+}
diff --git a/api/openai.go b/api/openai.go
index 52d65976..0a85349c 100644
--- a/api/openai.go
+++ b/api/openai.go
@@ -142,7 +142,7 @@ func defaultRequest(modelFile string) OpenAIRequest {
 }
 
 // https://platform.openai.com/docs/api-reference/completions
-func completionEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
+func completionEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 
 		model, input, err := readInput(c, loader, true)
@@ -199,7 +199,7 @@ func completionEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader,
 }
 
 // https://platform.openai.com/docs/api-reference/embeddings
-func embeddingsEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
+func embeddingsEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		model, input, err := readInput(c, loader, true)
 		if err != nil {
@@ -256,7 +256,7 @@ func embeddingsEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader,
 	}
 }
 
-func chatEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
+func chatEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
 
 	process := func(s string, req *OpenAIRequest, config *Config, loader *model.ModelLoader, responses chan OpenAIResponse) {
 		ComputeChoices(s, req, config, loader, func(s string, c *[]Choice) {}, func(s string) bool {
@@ -378,7 +378,7 @@ func chatEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, thread
 	}
 }
 
-func editEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
+func editEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		model, input, err := readInput(c, loader, true)
 		if err != nil {
@@ -449,7 +449,7 @@ func editEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, thread
 
 *
 */
-func imageEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, imageDir string) func(c *fiber.Ctx) error {
+func imageEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, imageDir string) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		m, input, err := readInput(c, loader, false)
 		if err != nil {
@@ -574,7 +574,7 @@ func imageEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, image
 }
 
 // https://platform.openai.com/docs/api-reference/audio/create
-func transcriptEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
+func transcriptEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		m, input, err := readInput(c, loader, false)
 		if err != nil {
@@ -641,7 +641,7 @@ func transcriptEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader,
 	}
 }
 
-func listModels(loader *model.ModelLoader, cm ConfigMerger) func(ctx *fiber.Ctx) error {
+func listModels(loader *model.ModelLoader, cm *ConfigMerger) func(ctx *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		models, err := loader.ListModels()
 		if err != nil {
@@ -655,7 +655,7 @@ func listModels(loader *model.ModelLoader, cm ConfigMerger) func(ctx *fiber.Ctx)
 			dataModels = append(dataModels, OpenAIModel{ID: m, Object: "model"})
 		}
 
-		for k := range cm {
+		for _, k := range cm.ListConfigs() {
 			if _, exists := mm[k]; !exists {
 				dataModels = append(dataModels, OpenAIModel{ID: k, Object: "model"})
 			}
diff --git a/main.go b/main.go
index 2490e198..f3ffc033 100644
--- a/main.go
+++ b/main.go
@@ -1,6 +1,7 @@
 package main
 
 import (
+	"context"
 	"fmt"
 	"os"
 	"path/filepath"
@@ -57,9 +58,9 @@ func main() {
 				Value:       ":8080",
 			},
 			&cli.StringFlag{
-				Name:        "image-dir",
+				Name:        "image-path",
 				DefaultText: "Image directory",
-				EnvVars:     []string{"IMAGE_DIR"},
+				EnvVars:     []string{"IMAGE_PATH"},
 				Value:       "",
 			},
 			&cli.IntFlag{
@@ -93,7 +94,7 @@ It uses llama.cpp, ggml and gpt4all as backend with golang c bindings.
 		Copyright: "go-skynet authors",
 		Action: func(ctx *cli.Context) error {
 			fmt.Printf("Starting LocalAI using %d threads, with models path: %s\n", ctx.Int("threads"), ctx.String("models-path"))
-			return api.App(ctx.String("config-file"), model.NewModelLoader(ctx.String("models-path")), ctx.Int("upload-limit"), ctx.Int("threads"), ctx.Int("context-size"), ctx.Bool("f16"), ctx.Bool("debug"), false, ctx.String("image-dir")).Listen(ctx.String("address"))
+			return api.App(context.Background(), ctx.String("config-file"), model.NewModelLoader(ctx.String("models-path")), ctx.Int("upload-limit"), ctx.Int("threads"), ctx.Int("context-size"), ctx.Bool("f16"), ctx.Bool("debug"), false, ctx.String("image-path")).Listen(ctx.String("address"))
 		},
 	}
 
diff --git a/pkg/gallery/gallery_suite_test.go b/pkg/gallery/gallery_suite_test.go
new file mode 100644
index 00000000..44256bc2
--- /dev/null
+++ b/pkg/gallery/gallery_suite_test.go
@@ -0,0 +1,13 @@
+package gallery_test
+
+import (
+	"testing"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+func TestGallery(t *testing.T) {
+	RegisterFailHandler(Fail)
+	RunSpecs(t, "Gallery test suite")
+}
diff --git a/pkg/gallery/models.go b/pkg/gallery/models.go
new file mode 100644
index 00000000..bd9e1371
--- /dev/null
+++ b/pkg/gallery/models.go
@@ -0,0 +1,237 @@
+package gallery
+
+import (
+	"crypto/sha256"
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"path/filepath"
+
+	"github.com/rs/zerolog/log"
+	"gopkg.in/yaml.v2"
+)
+
+/*
+
+description: |
+    foo
+license: ""
+
+urls:
+-
+-
+
+name: "bar"
+
+config_file: |
+    # Note, name will be injected. or generated by the alias wanted by the user
+    threads: 14
+
+files:
+    - filename: ""
+      sha: ""
+      uri: ""
+
+prompt_templates:
+    - name: ""
+      content: ""
+
+*/
+
+type Config struct {
+	Description     string           `yaml:"description"`
+	License         string           `yaml:"license"`
+	URLs            []string         `yaml:"urls"`
+	Name            string           `yaml:"name"`
+	ConfigFile      string           `yaml:"config_file"`
+	Files           []File           `yaml:"files"`
+	PromptTemplates []PromptTemplate `yaml:"prompt_templates"`
+}
+
+type File struct {
+	Filename string `yaml:"filename"`
+	SHA256   string `yaml:"sha256"`
+	URI      string `yaml:"uri"`
+}
+
+type PromptTemplate struct {
+	Name    string `yaml:"name"`
+	Content string `yaml:"content"`
+}
+
+func ReadConfigFile(filePath string) (*Config, error) {
+	// Read the YAML file
+	yamlFile, err := os.ReadFile(filePath)
+	if err != nil {
+		return nil, fmt.Errorf("failed to read YAML file: %v", err)
+	}
+
+	// Unmarshal YAML data into a Config struct
+	var config Config
+	err = yaml.Unmarshal(yamlFile, &config)
+	if err != nil {
+		return nil, fmt.Errorf("failed to unmarshal YAML: %v", err)
+	}
+
+	return &config, nil
+}
+
+func Apply(basePath, nameOverride string, config *Config) error {
+	// Create base path if it doesn't exist
+	err := os.MkdirAll(basePath, 0755)
+	if err != nil {
+		return fmt.Errorf("failed to create base path: %v", err)
+	}
+
+	// Download files and verify their SHA
+	for _, file := range config.Files {
+		log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename)
+
+		// Create file path
+		filePath := filepath.Join(basePath, file.Filename)
+
+		// Check if the file already exists
+		_, err := os.Stat(filePath)
+		if err == nil {
+			// File exists, check SHA
+			if file.SHA256 != "" {
+				// Verify SHA
+				calculatedSHA, err := calculateSHA(filePath)
+				if err != nil {
+					return fmt.Errorf("failed to calculate SHA for file %q: %v", file.Filename, err)
+				}
+				if calculatedSHA == file.SHA256 {
+					// SHA matches, skip downloading
+					log.Debug().Msgf("File %q already exists and matches the SHA. Skipping download", file.Filename)
+					continue
+				}
+				// SHA doesn't match, delete the file and download again
+				err = os.Remove(filePath)
+				if err != nil {
+					return fmt.Errorf("failed to remove existing file %q: %v", file.Filename, err)
+				}
+				log.Debug().Msgf("Removed %q (SHA doesn't match)", filePath)
+
+			} else {
+				// SHA is missing, skip downloading
+				log.Debug().Msgf("File %q already exists. Skipping download", file.Filename)
+				continue
+			}
+		} else if !os.IsNotExist(err) {
+			// Error occurred while checking file existence
+			return fmt.Errorf("failed to check file %q existence: %v", file.Filename, err)
+		}
+
+		log.Debug().Msgf("Downloading %q", file.URI)
+
+		// Download file
+		resp, err := http.Get(file.URI)
+		if err != nil {
+			return fmt.Errorf("failed to download file %q: %v", file.Filename, err)
+		}
+		defer resp.Body.Close()
+
+		// Create parent directory
+		err = os.MkdirAll(filepath.Dir(filePath), 0755)
+		if err != nil {
+			return fmt.Errorf("failed to create parent directory for file %q: %v", file.Filename, err)
+		}
+
+		// Create and write file content
+		outFile, err := os.Create(filePath)
+		if err != nil {
+			return fmt.Errorf("failed to create file %q: %v", file.Filename, err)
+		}
+		defer outFile.Close()
+
+		if file.SHA256 != "" {
+			log.Debug().Msgf("Download and verifying %q", file.Filename)
+
+			// Write file content and calculate SHA
+			hash := sha256.New()
+			_, err = io.Copy(io.MultiWriter(outFile, hash), resp.Body)
+			if err != nil {
+				return fmt.Errorf("failed to write file %q: %v", file.Filename, err)
+			}
+
+			// Verify SHA
+			calculatedSHA := fmt.Sprintf("%x", hash.Sum(nil))
+			if calculatedSHA != file.SHA256 {
+				return fmt.Errorf("SHA mismatch for file %q ( calculated: %s != metadata: %s )", file.Filename, calculatedSHA, file.SHA256)
+			}
+		} else {
+			log.Debug().Msgf("SHA missing for %q. Skipping validation", file.Filename)
+			_, err = io.Copy(outFile, resp.Body)
+			if err != nil {
+				return fmt.Errorf("failed to write file %q: %v", file.Filename, err)
+			}
+		}
+
+		log.Debug().Msgf("File %q downloaded and verified", file.Filename)
+	}
+
+	// Write prompt template contents to separate files
+	for _, template := range config.PromptTemplates {
+		// Create file path
+		filePath := filepath.Join(basePath, template.Name+".tmpl")
+
+		// Create parent directory
+		err := os.MkdirAll(filepath.Dir(filePath), 0755)
+		if err != nil {
+			return fmt.Errorf("failed to create parent directory for prompt template %q: %v", template.Name, err)
+		}
+		// Create and write file content
+		err = os.WriteFile(filePath, []byte(template.Content), 0644)
+		if err != nil {
+			return fmt.Errorf("failed to write prompt template %q: %v", template.Name, err)
+		}
+
+		log.Debug().Msgf("Prompt template %q written", template.Name)
+	}
+
+	name := config.Name
+	if nameOverride != "" {
+		name = nameOverride
+	}
+
+	configFilePath := filepath.Join(basePath, name+".yaml")
+
+	// Read and update config file as map[string]interface{}
+	configMap := make(map[string]interface{})
+	err = yaml.Unmarshal([]byte(config.ConfigFile), &configMap)
+	if err != nil {
+		return fmt.Errorf("failed to unmarshal config YAML: %v", err)
+	}
+
+	configMap["name"] = name
+
+	// Write updated config file
+	updatedConfigYAML, err := yaml.Marshal(configMap)
+	if err != nil {
+		return fmt.Errorf("failed to marshal updated config YAML: %v", err)
+	}
+
+	err = os.WriteFile(configFilePath, updatedConfigYAML, 0644)
+	if err != nil {
+		return fmt.Errorf("failed to write updated config file: %v", err)
+	}
+
+	log.Debug().Msgf("Written config file %s", configFilePath)
+	return nil
+}
+
+func calculateSHA(filePath string) (string, error) {
+	file, err := os.Open(filePath)
+	if err != nil {
+		return "", err
+	}
+	defer file.Close()
+
+	hash := sha256.New()
+	if _, err := io.Copy(hash, file); err != nil {
+		return "", err
+	}
+
+	return fmt.Sprintf("%x", hash.Sum(nil)), nil
+}
diff --git a/pkg/gallery/models_test.go b/pkg/gallery/models_test.go
new file mode 100644
index 00000000..123948ad
--- /dev/null
+++ b/pkg/gallery/models_test.go
@@ -0,0 +1,30 @@
+package gallery_test
+
+import (
+	"os"
+	"path/filepath"
+
+	. "github.com/go-skynet/LocalAI/pkg/gallery"
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+var _ = Describe("Model test", func() {
+	Context("Downloading", func() {
+		It("applies model correctly", func() {
+			tempdir, err := os.MkdirTemp("", "test")
+			Expect(err).ToNot(HaveOccurred())
+			defer os.RemoveAll(tempdir)
+			c, err := ReadConfigFile(filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml"))
+			Expect(err).ToNot(HaveOccurred())
+
+			err = Apply(tempdir, "", c)
+			Expect(err).ToNot(HaveOccurred())
+
+			for _, f := range []string{"cerebras", "cerebras-completion.tmpl", "cerebras-chat.tmpl", "cerebras.yaml"} {
+				_, err = os.Stat(filepath.Join(tempdir, f))
+				Expect(err).ToNot(HaveOccurred())
+			}
+		})
+	})
+})
diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go
index 74c05f29..b5e43a38 100644
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -164,11 +164,12 @@ func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, lla
 }
 
 func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOption, threads uint32) (interface{}, error) {
-	log.Debug().Msgf("Loading models greedly")
+	log.Debug().Msgf("Loading model '%s' greedly", modelFile)
 
 	ml.mu.Lock()
 	m, exists := ml.models[modelFile]
 	if exists {
+		log.Debug().Msgf("Model '%s' already loaded", modelFile)
 		ml.mu.Unlock()
 		return m, nil
 	}
diff --git a/tests/fixtures/gallery_simple.yaml b/tests/fixtures/gallery_simple.yaml
new file mode 100644
index 00000000..058733fe
--- /dev/null
+++ b/tests/fixtures/gallery_simple.yaml
@@ -0,0 +1,40 @@
+name: "cerebras"
+description: |
+    cerebras
+license: "Apache 2.0"
+
+config_file: |
+    parameters:
+      model: cerebras
+      top_k: 80
+      temperature: 0.2
+      top_p: 0.7
+    context_size: 1024
+    stopwords:
+    - "HUMAN:"
+    - "GPT:"
+    roles:
+      user: ""
+      system: ""
+    template:
+      completion: "cerebras-completion"
+      chat: cerebras-chat
+
+files:
+    - filename: "cerebras"
+      sha256: "c947051ae4dba9530ca55d923a7a484acd65664c8633462c8ccd4bb7848f2c65"
+      uri: "https://huggingface.co/concedo/cerebras-111M-ggml/resolve/main/cerebras-111m-q4_2.bin"
+
+prompt_templates:
+    - name: "cerebras-completion"
+      content: |
+        Complete the prompt
+        ### Prompt:
+        {{.Input}}
+        ### Response:
+    - name: "cerebras-chat"
+      content: |
+        The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
+        ### Prompt:
+        {{.Input}}
+        ### Response:
\ No newline at end of file
diff --git a/tests/fixtures/completion.tmpl b/tests/models_fixtures/completion.tmpl
similarity index 100%
rename from tests/fixtures/completion.tmpl
rename to tests/models_fixtures/completion.tmpl
diff --git a/tests/fixtures/config.yaml b/tests/models_fixtures/config.yaml
similarity index 100%
rename from tests/fixtures/config.yaml
rename to tests/models_fixtures/config.yaml
diff --git a/tests/fixtures/embeddings.yaml b/tests/models_fixtures/embeddings.yaml
similarity index 100%
rename from tests/fixtures/embeddings.yaml
rename to tests/models_fixtures/embeddings.yaml
diff --git a/tests/fixtures/ggml-gpt4all-j.tmpl b/tests/models_fixtures/ggml-gpt4all-j.tmpl
similarity index 100%
rename from tests/fixtures/ggml-gpt4all-j.tmpl
rename to tests/models_fixtures/ggml-gpt4all-j.tmpl
diff --git a/tests/fixtures/gpt4.yaml b/tests/models_fixtures/gpt4.yaml
similarity index 100%
rename from tests/fixtures/gpt4.yaml
rename to tests/models_fixtures/gpt4.yaml
diff --git a/tests/fixtures/gpt4_2.yaml b/tests/models_fixtures/gpt4_2.yaml
similarity index 100%
rename from tests/fixtures/gpt4_2.yaml
rename to tests/models_fixtures/gpt4_2.yaml
diff --git a/tests/fixtures/rwkv.yaml b/tests/models_fixtures/rwkv.yaml
similarity index 100%
rename from tests/fixtures/rwkv.yaml
rename to tests/models_fixtures/rwkv.yaml
diff --git a/tests/fixtures/rwkv_chat.tmpl b/tests/models_fixtures/rwkv_chat.tmpl
similarity index 100%
rename from tests/fixtures/rwkv_chat.tmpl
rename to tests/models_fixtures/rwkv_chat.tmpl
diff --git a/tests/fixtures/rwkv_completion.tmpl b/tests/models_fixtures/rwkv_completion.tmpl
similarity index 100%
rename from tests/fixtures/rwkv_completion.tmpl
rename to tests/models_fixtures/rwkv_completion.tmpl
diff --git a/tests/fixtures/whisper.yaml b/tests/models_fixtures/whisper.yaml
similarity index 100%
rename from tests/fixtures/whisper.yaml
rename to tests/models_fixtures/whisper.yaml

From 1a7587ee485f81fedb6616db8d6f982a27cdbd45 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Prud=27homme?=
 <sebastien.prudhomme@gmail.com>
Date: Thu, 18 May 2023 19:18:32 +0200
Subject: [PATCH 02/32] feat: add an environment variable to manage rebuild in
 Docker image (#290)

Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 .env          | 1 +
 Dockerfile    | 3 ++-
 README.md     | 2 ++
 entrypoint.sh | 4 +++-
 4 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/.env b/.env
index b05dac68..c5248593 100644
--- a/.env
+++ b/.env
@@ -3,3 +3,4 @@
 MODELS_PATH=/models
 # DEBUG=true
 # BUILD_TYPE=generic
+# REBUILD=true
diff --git a/Dockerfile b/Dockerfile
index 52869bb9..27ab3800 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,10 +1,11 @@
 ARG GO_VERSION=1.20
 ARG BUILD_TYPE=
 FROM golang:$GO_VERSION
+ENV REBUILD=true
 WORKDIR /build
 RUN apt-get update && apt-get install -y cmake libgomp1 libopenblas-dev libopenblas-base libopencv-dev libopencv-core-dev libopencv-core4.5 
 COPY . .
 RUN ln -s /usr/include/opencv4/opencv2/ /usr/include/opencv2
-RUN make prepare-sources
+RUN make build
 EXPOSE 8080
 ENTRYPOINT [ "/build/entrypoint.sh" ]
diff --git a/README.md b/README.md
index 3a4cbb95..4dab7d9c 100644
--- a/README.md
+++ b/README.md
@@ -464,6 +464,8 @@ You should see:
 └───────────────────────────────────────────────────┘ 
 ```
 
+Note: the binary inside the image is rebuild at the start of the container to enable CPU optimizations for the execution environment, you can set the environment variable `REBUILD` to `false` to prevent this behavior.
+
 </details>
 
 ### Build locally
diff --git a/entrypoint.sh b/entrypoint.sh
index aab14205..e7390e56 100755
--- a/entrypoint.sh
+++ b/entrypoint.sh
@@ -2,6 +2,8 @@
 
 cd /build
 
-make build
+if [ "$REBUILD" != "false" ]; then
+	make rebuild
+fi
 
 ./local-ai "$@"
\ No newline at end of file

From 5a6d9d4e5b9dcea22f00f3acd5bd9af52ea93d62 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Thu, 18 May 2023 21:12:42 +0200
Subject: [PATCH 03/32] fix(deps): update
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 546600f (#276)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 6 +++---
 go.sum | 2 ++
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/go.mod b/go.mod
index 1cd33ce8..960d799c 100644
--- a/go.mod
+++ b/go.mod
@@ -12,9 +12,10 @@ require (
 	github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c
 	github.com/go-skynet/go-llama.cpp v0.0.0-20230516230554-b7bbefbe0b84
 	github.com/gofiber/fiber/v2 v2.45.0
+	github.com/google/uuid v1.3.0
 	github.com/hashicorp/go-multierror v1.1.1
 	github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642
-	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230516143155-79d6243fe1bc
+	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230518171731-546600fb6878
 	github.com/onsi/ginkgo/v2 v2.9.5
 	github.com/onsi/gomega v1.27.6
 	github.com/otiai10/copy v1.11.0
@@ -24,6 +25,7 @@ require (
 	github.com/swaggo/swag v1.16.1
 	github.com/urfave/cli/v2 v2.25.3
 	github.com/valyala/fasthttp v1.47.0
+	gopkg.in/yaml.v2 v2.4.0
 	gopkg.in/yaml.v3 v3.0.1
 )
 
@@ -43,7 +45,6 @@ require (
 	github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
 	github.com/google/go-cmp v0.5.9 // indirect
 	github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect
-	github.com/google/uuid v1.3.0 // indirect
 	github.com/hashicorp/errwrap v1.0.0 // indirect
 	github.com/josharian/intern v1.0.0 // indirect
 	github.com/klauspost/compress v1.16.3 // indirect
@@ -64,5 +65,4 @@ require (
 	golang.org/x/sys v0.8.0 // indirect
 	golang.org/x/text v0.9.0 // indirect
 	golang.org/x/tools v0.9.1 // indirect
-	gopkg.in/yaml.v2 v2.4.0 // indirect
 )
diff --git a/go.sum b/go.sum
index 09af76da..e96eb2c8 100644
--- a/go.sum
+++ b/go.sum
@@ -113,6 +113,8 @@ github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWb
 github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230516143155-79d6243fe1bc h1:OPavP/SUsVWVYPhSUZKZeX8yDSQzf4G+BmUmwzrLTyI=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230516143155-79d6243fe1bc/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230518171731-546600fb6878 h1:3MFUW2a1Aqm2nMF5f+PNGq55cbxIzkRHQX/o7JVysAo=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230518171731-546600fb6878/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/onsi/ginkgo/v2 v2.9.4 h1:xR7vG4IXt5RWx6FfIjyAtsoMAtnc3C/rFXBBd2AjZwE=
 github.com/onsi/ginkgo/v2 v2.9.4/go.mod h1:gCQYp2Q+kSoIj7ykSVb9nskRSsR6PUj4AiLywzIhbKM=
 github.com/onsi/ginkgo/v2 v2.9.5 h1:+6Hr4uxzP4XIUyAkg61dWBw8lb/gc4/X5luuxN/EC+Q=

From 549a01b62e94238ff08fa3da893581432d69ecf8 Mon Sep 17 00:00:00 2001
From: Tyler Gillson <tyler.gillson@gmail.com>
Date: Thu, 18 May 2023 13:50:21 -0700
Subject: [PATCH 04/32] docs: fix langchain-chroma example (#298)

Signed-off-by: Tyler Gillson <tyler.gillson@gmail.com>
---
 .vscode/launch.json                              | 13 +++++++++++++
 examples/langchain-chroma/.env.example           |  5 +++++
 examples/langchain-chroma/.gitignore             |  4 ++++
 examples/langchain-chroma/README.md              | 11 +++++++++--
 examples/langchain-chroma/docker-compose.yml     | 15 +++++++++++++++
 examples/langchain-chroma/models/embeddings.yaml |  1 +
 examples/langchain-chroma/query.py               |  9 ++++++---
 examples/langchain-chroma/store.py               |  5 +----
 examples/query_data/docker-compose.yml           |  2 +-
 9 files changed, 55 insertions(+), 10 deletions(-)
 create mode 100644 examples/langchain-chroma/.env.example
 create mode 100644 examples/langchain-chroma/.gitignore
 create mode 100644 examples/langchain-chroma/docker-compose.yml

diff --git a/.vscode/launch.json b/.vscode/launch.json
index e8d94825..b45837ff 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -1,6 +1,19 @@
 {
     "version": "0.2.0",
     "configurations": [
+        {
+            "name": "Python: Current File",
+            "type": "python",
+            "request": "launch",
+            "program": "${file}",
+            "console": "integratedTerminal",
+            "justMyCode": false,
+            "cwd": "${workspaceFolder}/examples/langchain-chroma",
+            "env": {
+                "OPENAI_API_BASE": "http://localhost:8080/v1",
+                "OPENAI_API_KEY": "abc"
+            }
+        },
         {
             "name": "Launch Go",
             "type": "go",
diff --git a/examples/langchain-chroma/.env.example b/examples/langchain-chroma/.env.example
new file mode 100644
index 00000000..37cda598
--- /dev/null
+++ b/examples/langchain-chroma/.env.example
@@ -0,0 +1,5 @@
+THREADS=4
+CONTEXT_SIZE=512
+MODELS_PATH=/models
+DEBUG=true
+# BUILD_TYPE=generic
\ No newline at end of file
diff --git a/examples/langchain-chroma/.gitignore b/examples/langchain-chroma/.gitignore
new file mode 100644
index 00000000..3dc19014
--- /dev/null
+++ b/examples/langchain-chroma/.gitignore
@@ -0,0 +1,4 @@
+db/
+state_of_the_union.txt
+models/bert
+models/ggml-gpt4all-j
\ No newline at end of file
diff --git a/examples/langchain-chroma/README.md b/examples/langchain-chroma/README.md
index 70e3f42b..17207a02 100644
--- a/examples/langchain-chroma/README.md
+++ b/examples/langchain-chroma/README.md
@@ -10,13 +10,20 @@ Download the models and start the API:
 # Clone LocalAI
 git clone https://github.com/go-skynet/LocalAI
 
-cd LocalAI/examples/query_data
+cd LocalAI/examples/langchain-chroma
 
 wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O models/bert
 wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
 
+# configure your .env
+# NOTE: ensure that THREADS does not exceed your machine's CPU cores
+mv .env.example .env
+
 # start with docker-compose
 docker-compose up -d --build
+
+# tail the logs & wait until the build completes
+docker logs -f langchain-chroma-api-1
 ```
 
 ### Python requirements
@@ -37,7 +44,7 @@ wget https://raw.githubusercontent.com/hwchase17/chat-your-data/master/state_of_
 python store.py
 ```
 
-After it finishes, a directory "storage" will be created with the vector index database.
+After it finishes, a directory "db" will be created with the vector index database.
 
 ## Query
 
diff --git a/examples/langchain-chroma/docker-compose.yml b/examples/langchain-chroma/docker-compose.yml
new file mode 100644
index 00000000..96ef540e
--- /dev/null
+++ b/examples/langchain-chroma/docker-compose.yml
@@ -0,0 +1,15 @@
+version: '3.6'
+
+services:
+  api:
+    image: quay.io/go-skynet/local-ai:latest
+    build:
+      context: ../../
+      dockerfile: Dockerfile
+    ports:
+      - 8080:8080
+    env_file:
+      - ../../.env
+    volumes:
+      - ./models:/models:cached
+    command: ["/usr/bin/local-ai"]
diff --git a/examples/langchain-chroma/models/embeddings.yaml b/examples/langchain-chroma/models/embeddings.yaml
index 46a08502..536c8de1 100644
--- a/examples/langchain-chroma/models/embeddings.yaml
+++ b/examples/langchain-chroma/models/embeddings.yaml
@@ -1,5 +1,6 @@
 name: text-embedding-ada-002
 parameters:
   model: bert
+threads: 4
 backend: bert-embeddings
 embeddings: true
diff --git a/examples/langchain-chroma/query.py b/examples/langchain-chroma/query.py
index 2f7df507..33848818 100644
--- a/examples/langchain-chroma/query.py
+++ b/examples/langchain-chroma/query.py
@@ -2,8 +2,9 @@
 import os
 from langchain.vectorstores import Chroma
 from langchain.embeddings import OpenAIEmbeddings
-from langchain.llms import OpenAI
-from langchain.chains import VectorDBQA
+from langchain.chat_models import ChatOpenAI
+from langchain.chains import RetrievalQA
+from langchain.vectorstores.base import VectorStoreRetriever
 
 base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
 
@@ -12,8 +13,10 @@ embedding = OpenAIEmbeddings()
 persist_directory = 'db'
 
 # Now we can load the persisted database from disk, and use it as normal. 
+llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path)
 vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding)
-qa = VectorDBQA.from_chain_type(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path), chain_type="stuff", vectorstore=vectordb)
+retriever = VectorStoreRetriever(vectorstore=vectordb)
+qa = RetrievalQA.from_llm(llm=llm, retriever=retriever)
 
 query = "What the president said about taxes ?"
 print(qa.run(query))
diff --git a/examples/langchain-chroma/store.py b/examples/langchain-chroma/store.py
index 127bb240..b9cbad0e 100755
--- a/examples/langchain-chroma/store.py
+++ b/examples/langchain-chroma/store.py
@@ -2,9 +2,7 @@
 import os
 from langchain.vectorstores import Chroma
 from langchain.embeddings import OpenAIEmbeddings
-from langchain.text_splitter import RecursiveCharacterTextSplitter,TokenTextSplitter,CharacterTextSplitter
-from langchain.llms import OpenAI
-from langchain.chains import VectorDBQA
+from langchain.text_splitter import CharacterTextSplitter
 from langchain.document_loaders import TextLoader
 
 base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
@@ -14,7 +12,6 @@ loader = TextLoader('state_of_the_union.txt')
 documents = loader.load()
 
 text_splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=70)
-#text_splitter = TokenTextSplitter()
 texts = text_splitter.split_documents(documents)
 
 # Embed and store the texts
diff --git a/examples/query_data/docker-compose.yml b/examples/query_data/docker-compose.yml
index a59edfc4..cf76eb7f 100644
--- a/examples/query_data/docker-compose.yml
+++ b/examples/query_data/docker-compose.yml
@@ -4,7 +4,7 @@ services:
   api:
     image: quay.io/go-skynet/local-ai:latest
     build:
-      context: .
+      context: ../../
       dockerfile: Dockerfile
     ports:
       - 8080:8080

From 9d3c5ead93e49f9a3670386ef6714427de23e3c2 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Thu, 18 May 2023 22:50:45 +0200
Subject: [PATCH 05/32] fix(deps): update github.com/go-skynet/go-llama.cpp
 digest to 33f8c2d (#293)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 4 ++--
 go.sum | 4 ++++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/go.mod b/go.mod
index 960d799c..c62ffed9 100644
--- a/go.mod
+++ b/go.mod
@@ -10,14 +10,14 @@ require (
 	github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4
 	github.com/go-skynet/go-gpt2.cpp v0.0.0-20230512145559-7bff56f02245
 	github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c
-	github.com/go-skynet/go-llama.cpp v0.0.0-20230516230554-b7bbefbe0b84
+	github.com/go-skynet/go-llama.cpp v0.0.0-20230518171914-33f8c2db53bf
 	github.com/gofiber/fiber/v2 v2.45.0
 	github.com/google/uuid v1.3.0
 	github.com/hashicorp/go-multierror v1.1.1
 	github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642
 	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230518171731-546600fb6878
 	github.com/onsi/ginkgo/v2 v2.9.5
-	github.com/onsi/gomega v1.27.6
+	github.com/onsi/gomega v1.27.7
 	github.com/otiai10/copy v1.11.0
 	github.com/otiai10/openaigo v1.1.0
 	github.com/rs/zerolog v1.29.1
diff --git a/go.sum b/go.sum
index e96eb2c8..9fed8775 100644
--- a/go.sum
+++ b/go.sum
@@ -66,6 +66,8 @@ github.com/go-skynet/go-llama.cpp v0.0.0-20230510072905-70593fccbe4b h1:qqxrjY8f
 github.com/go-skynet/go-llama.cpp v0.0.0-20230510072905-70593fccbe4b/go.mod h1:DLfsPD7tYYnpksERH83HSf7qVNW3FIwmz7/zfYO0/6I=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230516230554-b7bbefbe0b84 h1:f5iYF75bAr73Tl8AdtFD5Urs/2bsHKPh52K++jLbsfk=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230516230554-b7bbefbe0b84/go.mod h1:jxyQ26t1aKC5Gn782w9WWh5n1133PxCOfkuc01xM4RQ=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230518171914-33f8c2db53bf h1:D9CLQwr1eqSnV0DM7YGOKhSfNajj2qOA7XAD6+p1/HI=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230518171914-33f8c2db53bf/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
 github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
@@ -121,6 +123,8 @@ github.com/onsi/ginkgo/v2 v2.9.5 h1:+6Hr4uxzP4XIUyAkg61dWBw8lb/gc4/X5luuxN/EC+Q=
 github.com/onsi/ginkgo/v2 v2.9.5/go.mod h1:tvAoo1QUJwNEU2ITftXTpR7R1RbCzoZUOs3RonqW57k=
 github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE=
 github.com/onsi/gomega v1.27.6/go.mod h1:PIQNjfQwkP3aQAH7lf7j87O/5FiNr+ZR8+ipb+qQlhg=
+github.com/onsi/gomega v1.27.7 h1:fVih9JD6ogIiHUN6ePK7HJidyEDpWGVB5mzM7cWNXoU=
+github.com/onsi/gomega v1.27.7/go.mod h1:1p8OOlwo2iUUDsHnOrjE5UKYJ+e3W8eQ3qSlRahPmr4=
 github.com/otiai10/copy v1.11.0 h1:OKBD80J/mLBrwnzXqGtFCzprFSGioo30JcmR4APsNwc=
 github.com/otiai10/copy v1.11.0/go.mod h1:rSaLseMUsZFFbsFGc7wCJnnkTAvdc5L6VWxPE4308Ww=
 github.com/otiai10/mint v1.5.1 h1:XaPLeE+9vGbuyEHem1JNk3bYc7KKqyI/na0/mLd/Kks=

From fc59f7484949d075ee327f5a71bd4b98164e7f88 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Thu, 18 May 2023 22:51:02 +0200
Subject: [PATCH 06/32] fix(deps): update
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 94f4018 (#294)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index c62ffed9..d5d97f6e 100644
--- a/go.mod
+++ b/go.mod
@@ -15,7 +15,7 @@ require (
 	github.com/google/uuid v1.3.0
 	github.com/hashicorp/go-multierror v1.1.1
 	github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642
-	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230518171731-546600fb6878
+	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230518183047-94f401889042
 	github.com/onsi/ginkgo/v2 v2.9.5
 	github.com/onsi/gomega v1.27.7
 	github.com/otiai10/copy v1.11.0
diff --git a/go.sum b/go.sum
index 9fed8775..f854c399 100644
--- a/go.sum
+++ b/go.sum
@@ -117,6 +117,8 @@ github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230516143155-79d624
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230516143155-79d6243fe1bc/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230518171731-546600fb6878 h1:3MFUW2a1Aqm2nMF5f+PNGq55cbxIzkRHQX/o7JVysAo=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230518171731-546600fb6878/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230518183047-94f401889042 h1:bMU75tTgyw6mYDa5NbOHlZ1KYqQgz7heQFfTwuvCGww=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230518183047-94f401889042/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/onsi/ginkgo/v2 v2.9.4 h1:xR7vG4IXt5RWx6FfIjyAtsoMAtnc3C/rFXBBd2AjZwE=
 github.com/onsi/ginkgo/v2 v2.9.4/go.mod h1:gCQYp2Q+kSoIj7ykSVb9nskRSsR6PUj4AiLywzIhbKM=
 github.com/onsi/ginkgo/v2 v2.9.5 h1:+6Hr4uxzP4XIUyAkg61dWBw8lb/gc4/X5luuxN/EC+Q=

From 207ce81e4af83db28ad09f1e2f68ef8304e0c628 Mon Sep 17 00:00:00 2001
From: Tyler Gillson <tyler.gillson@gmail.com>
Date: Thu, 18 May 2023 16:08:20 -0700
Subject: [PATCH 07/32] fix: Dockerfile.build missing cmake in rwkv example
 (#301)

Signed-off-by: Tyler Gillson <tyler.gillson@gmail.com>
---
 examples/rwkv/.gitignore       | 2 ++
 examples/rwkv/Dockerfile.build | 2 ++
 2 files changed, 4 insertions(+)
 create mode 100644 examples/rwkv/.gitignore

diff --git a/examples/rwkv/.gitignore b/examples/rwkv/.gitignore
new file mode 100644
index 00000000..ab3629c5
--- /dev/null
+++ b/examples/rwkv/.gitignore
@@ -0,0 +1,2 @@
+models/rwkv
+models/rwkv.tokenizer.json
\ No newline at end of file
diff --git a/examples/rwkv/Dockerfile.build b/examples/rwkv/Dockerfile.build
index c62024de..491f9ccd 100644
--- a/examples/rwkv/Dockerfile.build
+++ b/examples/rwkv/Dockerfile.build
@@ -1,5 +1,7 @@
 FROM python
 
+RUN apt-get update && apt-get -y install cmake
+
 # convert the model (one-off)
 RUN pip3 install torch numpy
 

From 1fade53a61ab62455e28d675cc961579c58f1937 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 19 May 2023 08:31:11 +0200
Subject: [PATCH 08/32] feat: minor enhancements to /models/apply (#297)

---
 .github/workflows/image.yml |  4 ++++
 .github/workflows/test.yml  |  4 ++++
 api/gallery.go              |  7 +++++--
 pkg/gallery/models.go       | 31 ++++++++++++++++++++++++++++---
 pkg/gallery/models_test.go  | 25 +++++++++++++++++++++++++
 5 files changed, 66 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index d83f58d3..eeada322 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -9,6 +9,10 @@ on:
     tags:
       - '*'
 
+concurrency:
+  group: ci-${{ github.head_ref || github.ref }}-${{ github.repository }}
+  cancel-in-progress: true
+
 jobs:
   docker:
     runs-on: ubuntu-latest
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 17e3c809..48fe2cf8 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -9,6 +9,10 @@ on:
     tags:
       - '*'
 
+concurrency:
+  group: ci-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
+  cancel-in-progress: true
+
 jobs:
   ubuntu-latest:
     runs-on: ubuntu-latest
diff --git a/api/gallery.go b/api/gallery.go
index 5378c7bc..cb165f84 100644
--- a/api/gallery.go
+++ b/api/gallery.go
@@ -86,6 +86,8 @@ func (g *galleryApplier) start(c context.Context, cm *ConfigMerger) {
 					continue
 				}
 
+				config.Files = append(config.Files, op.req.AdditionalFiles...)
+
 				if err := gallery.Apply(g.modelPath, op.req.Name, &config); err != nil {
 					updateError(err)
 					continue
@@ -106,8 +108,9 @@ func (g *galleryApplier) start(c context.Context, cm *ConfigMerger) {
 // endpoints
 
 type ApplyGalleryModelRequest struct {
-	URL  string `json:"url"`
-	Name string `json:"name"`
+	URL             string         `json:"url"`
+	Name            string         `json:"name"`
+	AdditionalFiles []gallery.File `json:"files"`
 }
 
 func getOpStatus(g *galleryApplier) func(c *fiber.Ctx) error {
diff --git a/pkg/gallery/models.go b/pkg/gallery/models.go
index bd9e1371..8c61380c 100644
--- a/pkg/gallery/models.go
+++ b/pkg/gallery/models.go
@@ -50,9 +50,9 @@ type Config struct {
 }
 
 type File struct {
-	Filename string `yaml:"filename"`
-	SHA256   string `yaml:"sha256"`
-	URI      string `yaml:"uri"`
+	Filename string `yaml:"filename" json:"filename"`
+	SHA256   string `yaml:"sha256" json:"sha256"`
+	URI      string `yaml:"uri" json:"uri"`
 }
 
 type PromptTemplate struct {
@@ -77,6 +77,21 @@ func ReadConfigFile(filePath string) (*Config, error) {
 	return &config, nil
 }
 
+func inTrustedRoot(path string, trustedRoot string) error {
+	for path != "/" {
+		path = filepath.Dir(path)
+		if path == trustedRoot {
+			return nil
+		}
+	}
+	return fmt.Errorf("path is outside of trusted root")
+}
+
+func verifyPath(path, basePath string) error {
+	c := filepath.Clean(filepath.Join(basePath, path))
+	return inTrustedRoot(c, basePath)
+}
+
 func Apply(basePath, nameOverride string, config *Config) error {
 	// Create base path if it doesn't exist
 	err := os.MkdirAll(basePath, 0755)
@@ -88,6 +103,9 @@ func Apply(basePath, nameOverride string, config *Config) error {
 	for _, file := range config.Files {
 		log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename)
 
+		if err := verifyPath(file.Filename, basePath); err != nil {
+			return err
+		}
 		// Create file path
 		filePath := filepath.Join(basePath, file.Filename)
 
@@ -173,6 +191,9 @@ func Apply(basePath, nameOverride string, config *Config) error {
 
 	// Write prompt template contents to separate files
 	for _, template := range config.PromptTemplates {
+		if err := verifyPath(template.Name+".tmpl", basePath); err != nil {
+			return err
+		}
 		// Create file path
 		filePath := filepath.Join(basePath, template.Name+".tmpl")
 
@@ -195,6 +216,10 @@ func Apply(basePath, nameOverride string, config *Config) error {
 		name = nameOverride
 	}
 
+	if err := verifyPath(name+".yaml", basePath); err != nil {
+		return err
+	}
+
 	configFilePath := filepath.Join(basePath, name+".yaml")
 
 	// Read and update config file as map[string]interface{}
diff --git a/pkg/gallery/models_test.go b/pkg/gallery/models_test.go
index 123948ad..980b3a96 100644
--- a/pkg/gallery/models_test.go
+++ b/pkg/gallery/models_test.go
@@ -26,5 +26,30 @@ var _ = Describe("Model test", func() {
 				Expect(err).ToNot(HaveOccurred())
 			}
 		})
+		It("renames model correctly", func() {
+			tempdir, err := os.MkdirTemp("", "test")
+			Expect(err).ToNot(HaveOccurred())
+			defer os.RemoveAll(tempdir)
+			c, err := ReadConfigFile(filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml"))
+			Expect(err).ToNot(HaveOccurred())
+
+			err = Apply(tempdir, "foo", c)
+			Expect(err).ToNot(HaveOccurred())
+
+			for _, f := range []string{"cerebras", "cerebras-completion.tmpl", "cerebras-chat.tmpl", "foo.yaml"} {
+				_, err = os.Stat(filepath.Join(tempdir, f))
+				Expect(err).ToNot(HaveOccurred())
+			}
+		})
+		It("catches path traversals", func() {
+			tempdir, err := os.MkdirTemp("", "test")
+			Expect(err).ToNot(HaveOccurred())
+			defer os.RemoveAll(tempdir)
+			c, err := ReadConfigFile(filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml"))
+			Expect(err).ToNot(HaveOccurred())
+
+			err = Apply(tempdir, "../../../foo", c)
+			Expect(err).To(HaveOccurred())
+		})
 	})
 })

From cadce540f96e5a4a1b178861ce6d25862b5f7e70 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Fri, 19 May 2023 10:00:07 +0200
Subject: [PATCH 09/32] fix(deps): update
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 914519e (#302)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index d5d97f6e..5efe1f4e 100644
--- a/go.mod
+++ b/go.mod
@@ -15,7 +15,7 @@ require (
 	github.com/google/uuid v1.3.0
 	github.com/hashicorp/go-multierror v1.1.1
 	github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642
-	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230518183047-94f401889042
+	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230519014017-914519e772fd
 	github.com/onsi/ginkgo/v2 v2.9.5
 	github.com/onsi/gomega v1.27.7
 	github.com/otiai10/copy v1.11.0
diff --git a/go.sum b/go.sum
index f854c399..b188c1e0 100644
--- a/go.sum
+++ b/go.sum
@@ -119,6 +119,8 @@ github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230518171731-546600
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230518171731-546600fb6878/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230518183047-94f401889042 h1:bMU75tTgyw6mYDa5NbOHlZ1KYqQgz7heQFfTwuvCGww=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230518183047-94f401889042/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230519014017-914519e772fd h1:kMnZASxCNc8GsPuAV94tltEsfT6T+esuB+rgzdjwFVM=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230519014017-914519e772fd/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/onsi/ginkgo/v2 v2.9.4 h1:xR7vG4IXt5RWx6FfIjyAtsoMAtnc3C/rFXBBd2AjZwE=
 github.com/onsi/ginkgo/v2 v2.9.4/go.mod h1:gCQYp2Q+kSoIj7ykSVb9nskRSsR6PUj4AiLywzIhbKM=
 github.com/onsi/ginkgo/v2 v2.9.5 h1:+6Hr4uxzP4XIUyAkg61dWBw8lb/gc4/X5luuxN/EC+Q=

From 837ce2cb313bfe712761bc2fc38ed7bba77589bc Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Fri, 19 May 2023 10:37:12 +0200
Subject: [PATCH 10/32] :arrow_up: Update nomic-ai/gpt4all (#295)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index ea750230..bee2a91d 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
 
 GOLLAMA_VERSION?=b7bbefbe0b84262e003387a605842bdd0d099300
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
-GPT4ALL_VERSION?=bce2b3025b360af73091da0128b1e91f9bc94f9f
+GPT4ALL_VERSION?=213e033540fa3b68202bb12cf7f0134cfe6638aa
 GOGPT2_VERSION?=7bff56f0224502c1c9ed6258d2a17e8084628827
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=07166da10cb2a9e8854395a4f210464dcea76e47

From aa7a18f131547537ad0df3808aec895e0e438c22 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 19 May 2023 11:46:53 +0200
Subject: [PATCH 11/32] github: add ISSUE_TEMPLATE (#307)

---
 .github/ISSUE_TEMPLATE/bug_report.md      | 31 +++++++++++++++++++++++
 .github/ISSUE_TEMPLATE/config.yml         |  8 ++++++
 .github/ISSUE_TEMPLATE/feature_request.md | 22 ++++++++++++++++
 3 files changed, 61 insertions(+)
 create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md
 create mode 100644 .github/ISSUE_TEMPLATE/config.yml
 create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md

diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
new file mode 100644
index 00000000..a7f77221
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,31 @@
+---
+name: Bug report
+about: Create a report to help us improve
+title: ''
+labels: bug
+assignees: mudler
+
+---
+
+<!-- Thanks for helping us to improve LocalAI! We welcome all bug reports. Please fill out each area of the template so we can better help you. Comments like this will be hidden when you post but you can delete them if you wish. -->
+
+**LocalAI version:**
+<!-- Container Image or LocalAI tag/commit -->
+
+**Environment, CPU architecture, OS, and Version:**
+<!-- Provide the output from "uname -a", HW specs, if it's a VM  -->
+
+**Describe the bug**
+<!-- A clear and concise description of what the bug is. -->
+
+**To Reproduce**
+<!-- Steps to reproduce the behavior, including the LocalAI command used, if any -->
+
+**Expected behavior**
+<!-- A clear and concise description of what you expected to happen. -->
+
+**Logs**
+<!-- If applicable, add logs while running LocalAI in debug mode (`--debug` or `DEBUG=true`) to help explain your problem.  -->
+
+**Additional context**
+<!-- Add any other context about the problem here. -->
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 00000000..acc65c80
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,8 @@
+blank_issues_enabled: false
+contact_links:
+  - name: Community Support
+    url: https://github.com/go-skynet/LocalAI/discussions
+    about: Please ask and answer questions here.
+  - name: Discord
+    url: https://discord.gg/uJAeKSAGDy
+    about: Join our community on Discord!
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
new file mode 100644
index 00000000..c184aae9
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,22 @@
+---
+name: Feature request
+about: Suggest an idea for this project
+title: ''
+labels: enhancement
+assignees: mudler
+
+---
+
+<!-- Thanks for helping us to improve LocalAI! We welcome all feature requests. Please fill out each area of the template so we can better help you. Comments like this will be hidden when you post but you can delete them if you wish. -->
+
+**Is your feature request related to a problem? Please describe.**
+<!-- A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]  -->
+
+**Describe the solution you'd like**
+<!-- A clear and concise description of what you want to happen.  -->
+
+**Describe alternatives you've considered**
+<!-- A clear and concise description of any alternative solutions or features you've considered.  -->
+
+**Additional context**
+<!-- Add any other context or screenshots about the feature request here. -->

From 19deea986a6488a5330274bef6a3c3df399a6e4d Mon Sep 17 00:00:00 2001
From: Aisuko <urakiny@gmail.com>
Date: Sat, 20 May 2023 00:39:48 +1000
Subject: [PATCH 12/32] fix: missing model path in launch.json (#309)

Signed-off-by: Aisuko <urakiny@gmail.com>
---
 .vscode/launch.json | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.vscode/launch.json b/.vscode/launch.json
index b45837ff..cf4fb924 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -15,7 +15,7 @@
             }
         },
         {
-            "name": "Launch Go",
+            "name": "Launch LocalAI API",
             "type": "go",
             "request": "launch",
             "mode": "debug",
@@ -24,8 +24,8 @@
                 "api"
             ],
             "env": {
-                "C_INCLUDE_PATH": "/workspace/go-llama:/workspace/go-gpt4all-j:/workspace/go-gpt2",
-                "LIBRARY_PATH": "/workspace/go-llama:/workspace/go-gpt4all-j:/workspace/go-gpt2",
+                "C_INCLUDE_PATH": "${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz",
+                "LIBRARY_PATH": "$${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz",
                 "DEBUG": "true"
             }
         }

From bf3d936aea5b22619e86fab423d0f7170abf4206 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 19 May 2023 17:11:28 +0200
Subject: [PATCH 13/32] fix: add LLAMA_CUBLAS on BUILD_TYPE=cublas (#310)

---
 Makefile | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index bee2a91d..35ab79a9 100644
--- a/Makefile
+++ b/Makefile
@@ -16,7 +16,6 @@ BUILD_TYPE?=
 CGO_LDFLAGS?=
 CUDA_LIBPATH?=/usr/local/cuda/lib64/
 STABLEDIFFUSION_VERSION?=c0748eca3642d58bcf9521108bcee46959c647dc
-
 GO_TAGS?=
 
 OPTIONAL_TARGETS?=
@@ -36,9 +35,9 @@ endif
 
 ifeq ($(BUILD_TYPE),cublas)
 	CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
+	export LLAMA_CUBLAS=1
 endif
 
-
 ifeq ($(GO_TAGS),stablediffusion)
 	OPTIONAL_TARGETS+=go-stable-diffusion/libstablediffusion.a
 endif

From 2e64ed625562feabd08bd4ef7dfe7926302b7f78 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 19 May 2023 19:33:53 +0200
Subject: [PATCH 14/32] docs: Update README (#311)

---
 README.md | 178 ++++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 152 insertions(+), 26 deletions(-)

diff --git a/README.md b/README.md
index 4dab7d9c..f91eb69d 100644
--- a/README.md
+++ b/README.md
@@ -9,26 +9,32 @@
 
 [![](https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted)](https://discord.gg/uJAeKSAGDy) 
 
-**LocalAI** is a drop-in replacement REST API compatible with OpenAI API specifications for local inferencing. It allows to run models locally or on-prem with consumer grade hardware, supporting multiple models families compatible with the `ggml` format. For a list of the supported model families, see [the model compatibility table below](https://github.com/go-skynet/LocalAI#model-compatibility-table).
+**LocalAI** is a drop-in replacement REST API that's compatible with OpenAI API specifications for local inferencing. It allows you to run models locally or on-prem with consumer grade hardware, supporting multiple model families that are compatible with the ggml format.
+
+For a list of the supported model families, please see [the model compatibility table below](https://github.com/go-skynet/LocalAI#model-compatibility-table).
+
+In a nutshell:
 
 - Local, OpenAI drop-in alternative REST API. You own your data.
+- NO GPU required. NO Internet access is required either. Optional, GPU Acceleration is available in `llama.cpp`-compatible LLMs. [See building instructions](https://github.com/go-skynet/LocalAI#cublas).
 - Supports multiple models, Audio transcription, Text generation with GPTs, Image generation with stable diffusion (experimental)
 - Once loaded the first time, it keep models loaded in memory for faster inference
-- Support for prompt templates
 - Doesn't shell-out, but uses C++ bindings for a faster inference and better performance. 
-- NO GPU required. NO Internet access is required either. Optional, GPU Acceleration is available in `llama.cpp`-compatible LLMs. [See building instructions](https://github.com/go-skynet/LocalAI#cublas).
 
 LocalAI is a community-driven project, focused on making the AI accessible to anyone. Any contribution, feedback and PR is welcome! It was initially created by [mudler](https://github.com/mudler/) at the [SpectroCloud OSS Office](https://github.com/spectrocloud).
 
-LocalAI uses C++ bindings for optimizing speed. It is based on [llama.cpp](https://github.com/ggerganov/llama.cpp), [gpt4all](https://github.com/nomic-ai/gpt4all), [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp), [ggml](https://github.com/ggerganov/ggml), [whisper.cpp](https://github.com/ggerganov/whisper.cpp) for audio transcriptions, and [bert.cpp](https://github.com/skeskinen/bert.cpp) for embedding.
+
 
 See [examples on how to integrate LocalAI](https://github.com/go-skynet/LocalAI/tree/master/examples/).
 
-
 ### How does it work?  
 
 <details>
-  
+
+LocalAI is an API written in Go that serves as an OpenAI shim, enabling software already developed with OpenAI SDKs to seamlessly integrate with LocalAI. It can be effortlessly implemented as a substitute, even on consumer-grade hardware. This capability is achieved by employing various C++ backends, including [ggml](https://github.com/ggerganov/ggml), to perform inference on LLMs using both CPU and, if desired, GPU.
+
+LocalAI uses C++ bindings for optimizing speed. It is based on [llama.cpp](https://github.com/ggerganov/llama.cpp), [gpt4all](https://github.com/nomic-ai/gpt4all), [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp), [ggml](https://github.com/ggerganov/ggml), [whisper.cpp](https://github.com/ggerganov/whisper.cpp) for audio transcriptions, [bert.cpp](https://github.com/skeskinen/bert.cpp) for embedding and [StableDiffusion-NCN](https://github.com/EdVince/Stable-Diffusion-NCNN) for image generation. See [the model compatibility table](https://github.com/go-skynet/LocalAI#model-compatibility-table) to learn about all the components of LocalAI.
+
 ![LocalAI](https://github.com/go-skynet/LocalAI/assets/2420543/38de3a9b-3866-48cd-9234-662f9571064a)
 
 </details>
@@ -117,31 +123,31 @@ Depending on the model you are attempting to run might need more RAM or CPU reso
 
 <details>
 
-| Backend         | Compatible models     | Completion/Chat endpoint | Audio transcription/Image | Embeddings support                | Token stream support | Github                                     | Bindings                                  |
-|-----------------|-----------------------|--------------------------|---------------------|-----------------------------------|----------------------|--------------------------------------------|-------------------------------------------|
-| llama           | Vicuna, Alpaca, LLaMa | yes                      | no                  | yes (doesn't seem to be accurate) | yes                  | https://github.com/ggerganov/llama.cpp     | https://github.com/go-skynet/go-llama.cpp |
-| gpt4all-llama   | Vicuna, Alpaca, LLaMa | yes                      | no                  | no                                | yes                  | https://github.com/nomic-ai/gpt4all        | https://github.com/go-skynet/gpt4all      |
-| gpt4all-mpt     | MPT                   | yes                      | no                  | no                                | yes                  | https://github.com/nomic-ai/gpt4all        | https://github.com/go-skynet/gpt4all      |
-| gpt4all-j       | GPT4ALL-J             | yes                      | no                  | no                                | yes                  | https://github.com/nomic-ai/gpt4all        | https://github.com/go-skynet/gpt4all      |
-| gpt2            | GPT/NeoX, Cerebras    | yes                      | no                  | no                                | no                   | https://github.com/ggerganov/ggml          | https://github.com/go-skynet/go-gpt2.cpp  |
-| dolly           | Dolly                 | yes                      | no                  | no                                | no                   | https://github.com/ggerganov/ggml          | https://github.com/go-skynet/go-gpt2.cpp  |
-| redpajama       | RedPajama             | yes                      | no                  | no                                | no                   | https://github.com/ggerganov/ggml          | https://github.com/go-skynet/go-gpt2.cpp  |
-| stableLM        | StableLM GPT/NeoX     | yes                      | no                  | no                                | no                   | https://github.com/ggerganov/ggml          | https://github.com/go-skynet/go-gpt2.cpp  |
-| replit       | Replit             | yes                      | no                  | no                                | no                   | https://github.com/ggerganov/ggml          | https://github.com/go-skynet/go-gpt2.cpp  |
-| gptneox       | GPT NeoX             | yes                      | no                  | no                                | no                   | https://github.com/ggerganov/ggml          | https://github.com/go-skynet/go-gpt2.cpp  |
-| starcoder       | Starcoder             | yes                      | no                  | no                                | no                   | https://github.com/ggerganov/ggml          | https://github.com/go-skynet/go-gpt2.cpp  |
-| bloomz          | Bloom                 | yes                      | no                  | no                                | no                   | https://github.com/NouamaneTazi/bloomz.cpp | https://github.com/go-skynet/bloomz.cpp   |
-| rwkv            | RWKV                  | yes                      | no                  | no                                | yes                  | https://github.com/saharNooby/rwkv.cpp     | https://github.com/donomii/go-rwkv.cpp    |
-| bert-embeddings | bert                  | no                       | no                  | yes                               | no                   | https://github.com/skeskinen/bert.cpp      | https://github.com/go-skynet/go-bert.cpp  |
-| whisper         | whisper               | no                       | Audio                 | no                                | no                   | https://github.com/ggerganov/whisper.cpp   | https://github.com/ggerganov/whisper.cpp  |
-| stablediffusion         | stablediffusion               | no                       | Image                 | no                                | no                   | https://github.com/EdVince/Stable-Diffusion-NCNN   | https://github.com/mudler/go-stable-diffusion  |
+| Backend and Bindings                                                             | Compatible models     | Completion/Chat endpoint | Audio transcription/Image | Embeddings support                | Token stream support |
+|----------------------------------------------------------------------------------|-----------------------|--------------------------|---------------------------|-----------------------------------|----------------------|
+| [llama](https://github.com/ggerganov/llama.cpp) ([binding](https://github.com/go-skynet/go-llama.cpp))         | Vicuna, Alpaca, LLaMa | yes                      | no                        | yes (doesn't seem to be accurate) | yes                  |
+| [gpt4all-llama](https://github.com/nomic-ai/gpt4all)      | Vicuna, Alpaca, LLaMa | yes                      | no                        | no                                | yes                  |
+| [gpt4all-mpt](https://github.com/nomic-ai/gpt4all)          | MPT                   | yes                      | no                        | no                                | yes                  |
+| [gpt4all-j](https://github.com/nomic-ai/gpt4all)           | GPT4ALL-J             | yes                      | no                        | no                                | yes                  |
+| [gpt2](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-gpt2.cpp))             | GPT/NeoX, Cerebras    | yes                      | no                        | no                                | no                   |
+| [dolly](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-gpt2.cpp))            | Dolly                 | yes                      | no                        | no                                | no                   |
+| [redpajama](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-gpt2.cpp))        | RedPajama             | yes                      | no                        | no                                | no                   |
+| [stableLM](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-gpt2.cpp))         | StableLM GPT/NeoX     | yes                      | no                        | no                                | no                   |
+| [replit](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-gpt2.cpp))        | Replit             | yes                      | no                        | no                                | no                   |
+| [gptneox](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-gpt2.cpp))        | GPT NeoX             | yes                      | no                        | no                                | no                   |
+| [starcoder](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-gpt2.cpp))        | Starcoder             | yes                      | no                        | no                                | no                   |
+| [bloomz](https://github.com/NouamaneTazi/bloomz.cpp) ([binding](https://github.com/go-skynet/bloomz.cpp))       | Bloom                 | yes                      | no                        | no                                | no                   |
+| [rwkv](https://github.com/saharNooby/rwkv.cpp) ([binding](https://github.com/donomii/go-rw))       | rwkv                 | yes                      | no                        | no                                | yes                   |
+| [bert](https://github.com/skeskinen/bert.cpp) ([binding](https://github.com/go-skynet/go-bert.cpp) | bert                  | no                       | no                  | yes                               | no                   |    
+| [whisper](https://github.com/ggerganov/whisper.cpp)         | whisper               | no                       | Audio                 | no                                | no                   |  
+| [stablediffusion](https://github.com/EdVince/Stable-Diffusion-NCNN) ([binding](https://github.com/mudler/go-stable-diffusion))        | stablediffusion               | no                       | Image                 | no                                | no                   | 
 </details>
 
 ## Usage
 
 > `LocalAI` comes by default as a container image. You can check out all the available images with corresponding tags [here](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest).
 
-The easiest way to run LocalAI is by using `docker-compose`:
+The easiest way to run LocalAI is by using `docker-compose` (to build locally, see [building LocalAI](https://github.com/go-skynet/LocalAI/tree/master#setup)):
 
 ```bash
 
@@ -214,7 +220,25 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso
 ```
 </details>
 
-To build locally, run `make build` (see below).
+### Advanced: prepare models using the API
+
+Instead of installing models manually, you can use the LocalAI API endpoints and a model definition to install programmatically via API models in runtime.
+
+<details>
+
+A curated collection of model files is in the [model-gallery](https://github.com/go-skynet/model-gallery) (work in progress!).
+
+To install for example `gpt4all-j`, you can send a POST call to the `/models/apply` endpoint with the model definition url (`url`) and the name of the model should have in LocalAI (`name`, optional):
+
+```
+curl http://localhost:8080/models/apply -H "Content-Type: application/json" -d '{
+     "url": "https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml",
+     "name": "gpt4all-j"
+   }'  
+```
+
+</details>
+
 
 ### Other examples
 
@@ -823,6 +847,108 @@ models
 
 </details>
 
+## LocalAI API endpoints
+
+Besides the OpenAI endpoints, there are additional LocalAI-only API endpoints.
+
+### Applying a model - `/models/apply`
+
+This endpoint can be used to install a model in runtime. 
+
+<details>
+
+LocalAI will create a batch process that downloads the required files from a model definition and automatically reload itself to include the new model. 
+
+Input: `url`, `name` (optional), `files` (optional)
+
+```bash
+curl http://localhost:8080/models/apply -H "Content-Type: application/json" -d '{
+     "url": "<MODEL_DEFINITION_URL>",
+     "name": "<MODEL_NAME>",
+     "files": [
+        {
+            "uri": "<additional_file>",
+            "sha256": "<additional_file_hash>",
+            "name": "<additional_file_name>"
+        }
+     ]
+   }
+```
+
+An optional, list of additional files can be specified to be downloaded. The `name` allows to override the model name.
+
+Returns an `uuid` and an `url` to follow up the state of the process:
+
+```json
+{ "uid":"251475c9-f666-11ed-95e0-9a8a4480ac58", "status":"http://localhost:8080/models/jobs/251475c9-f666-11ed-95e0-9a8a4480ac58"}
+```
+
+To see a collection example of curated models definition files, see the [model-gallery](https://github.com/go-skynet/model-gallery).
+
+</details>
+
+### Inquiry model job state `/models/jobs/<uid>`
+
+This endpoint returns the state of the batch job associated to a model
+<details>
+
+This endpoint can be used with the uuid returned by `/models/apply` to check a job state:
+
+```bash
+curl http://localhost:8080/models/jobs/251475c9-f666-11ed-95e0-9a8a4480ac58
+```
+
+Returns a json containing the error, and if the job is being processed:
+
+```json
+{"error":null,"processed":true,"message":"completed"}
+```
+
+</details>
+
+## Clients
+
+OpenAI clients are already compatible with LocalAI by overriding the basePath, or the target URL.
+
+## Javascript
+
+<details> 
+
+https://github.com/openai/openai-node/
+
+```javascript
+import { Configuration, OpenAIApi } from 'openai';
+
+const configuration = new Configuration({
+  basePath: `http://localhost:8080/v1`
+});
+const openai = new OpenAIApi(configuration);
+```
+
+</details>
+
+## Python
+
+<details>
+
+https://github.com/openai/openai-python
+
+Set the `OPENAI_API_BASE` environment variable, or by code:
+
+```python
+import openai
+
+openai.api_base = "http://localhost:8080/v1"
+
+# create a chat completion
+chat_completion = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hello world"}])
+
+# print the completion
+print(completion.choices[0].message.content)
+```
+
+</details>
+
 ## Frequently asked questions
 
 Here are answers to some of the most common questions.

From b425954b9e8090a81d48006562428e3ea4881ac5 Mon Sep 17 00:00:00 2001
From: mudler <mudler@mocaccino.org>
Date: Fri, 19 May 2023 19:42:40 +0200
Subject: [PATCH 15/32] docs: Update README

---
 README.md | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index f91eb69d..007802a0 100644
--- a/README.md
+++ b/README.md
@@ -23,9 +23,7 @@ In a nutshell:
 
 LocalAI is a community-driven project, focused on making the AI accessible to anyone. Any contribution, feedback and PR is welcome! It was initially created by [mudler](https://github.com/mudler/) at the [SpectroCloud OSS Office](https://github.com/spectrocloud).
 
-
-
-See [examples on how to integrate LocalAI](https://github.com/go-skynet/LocalAI/tree/master/examples/).
+See the [usage](https://github.com/go-skynet/LocalAI#usage) and [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/) sections to learn how to use LocalAI.
 
 ### How does it work?  
 
@@ -41,6 +39,7 @@ LocalAI uses C++ bindings for optimizing speed. It is based on [llama.cpp](https
 
 ## News
 
+- 19-05-2023: __v1.13.0__ released! 🔥🔥 updates to the `gpt4all` and `llama` backend, consolidated CUDA support ( https://github.com/go-skynet/LocalAI/pull/310 thanks to @bubthegreat and @Thireus ), preliminar support for [installing models via API](https://github.com/go-skynet/LocalAI#advanced-prepare-models-using-the-api).
 - 17-05-2023:  __v1.12.0__ released! 🔥🔥 Minor fixes, plus CUDA (https://github.com/go-skynet/LocalAI/pull/258) support for `llama.cpp`-compatible models and image generation (https://github.com/go-skynet/LocalAI/pull/272).
 - 16-05-2023: 🔥🔥🔥 Experimental support for CUDA (https://github.com/go-skynet/LocalAI/pull/258) in the `llama.cpp` backend and Stable diffusion CPU image generation (https://github.com/go-skynet/LocalAI/pull/272) in `master`.
 

From 91fc52bfb78cf327ec38aa2cee032f7adeb5ec1f Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Sat, 20 May 2023 00:27:13 +0200
Subject: [PATCH 16/32] :arrow_up: Update go-skynet/go-llama.cpp (#296)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 35ab79a9..81939fe0 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@ GOTEST=$(GOCMD) test
 GOVET=$(GOCMD) vet
 BINARY_NAME=local-ai
 
-GOLLAMA_VERSION?=b7bbefbe0b84262e003387a605842bdd0d099300
+GOLLAMA_VERSION?=3ee537e8cb52cf8334832a07fe753c756130e949
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
 GPT4ALL_VERSION?=213e033540fa3b68202bb12cf7f0134cfe6638aa
 GOGPT2_VERSION?=7bff56f0224502c1c9ed6258d2a17e8084628827

From 465a3b755de0fbdcd1f88cfedff01dd16a794b9d Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Sat, 20 May 2023 00:30:36 +0200
Subject: [PATCH 17/32] :arrow_up: Update nomic-ai/gpt4all (#312)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 81939fe0..87337d64 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
 
 GOLLAMA_VERSION?=3ee537e8cb52cf8334832a07fe753c756130e949
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
-GPT4ALL_VERSION?=213e033540fa3b68202bb12cf7f0134cfe6638aa
+GPT4ALL_VERSION?=914519e772fd78c15691dcd0b8bac60d6af514ec
 GOGPT2_VERSION?=7bff56f0224502c1c9ed6258d2a17e8084628827
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=07166da10cb2a9e8854395a4f210464dcea76e47

From ffaf3b1d362cc4d0045e1ed60bbcf0481d21f8ef Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Sat, 20 May 2023 00:30:49 +0200
Subject: [PATCH 18/32] fix(deps): update github.com/go-skynet/go-llama.cpp
 digest to 3ee537e (#313)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 5efe1f4e..83095d25 100644
--- a/go.mod
+++ b/go.mod
@@ -10,7 +10,7 @@ require (
 	github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4
 	github.com/go-skynet/go-gpt2.cpp v0.0.0-20230512145559-7bff56f02245
 	github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c
-	github.com/go-skynet/go-llama.cpp v0.0.0-20230518171914-33f8c2db53bf
+	github.com/go-skynet/go-llama.cpp v0.0.0-20230519203945-3ee537e8cb52
 	github.com/gofiber/fiber/v2 v2.45.0
 	github.com/google/uuid v1.3.0
 	github.com/hashicorp/go-multierror v1.1.1
diff --git a/go.sum b/go.sum
index b188c1e0..752ade5f 100644
--- a/go.sum
+++ b/go.sum
@@ -68,6 +68,8 @@ github.com/go-skynet/go-llama.cpp v0.0.0-20230516230554-b7bbefbe0b84 h1:f5iYF75b
 github.com/go-skynet/go-llama.cpp v0.0.0-20230516230554-b7bbefbe0b84/go.mod h1:jxyQ26t1aKC5Gn782w9WWh5n1133PxCOfkuc01xM4RQ=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230518171914-33f8c2db53bf h1:D9CLQwr1eqSnV0DM7YGOKhSfNajj2qOA7XAD6+p1/HI=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230518171914-33f8c2db53bf/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230519203945-3ee537e8cb52 h1:VU64ntI6BuWRMFMzx8OTH26k5BJqY8vgW0Igyn9MYKc=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230519203945-3ee537e8cb52/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
 github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=

From 4e381cbe927711bfcb89e5172c3431e68ec1b0ac Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 20 May 2023 09:06:30 +0200
Subject: [PATCH 19/32] feat: support shorter urls for github repositories
 (#314)

---
 Makefile            |  1 +
 api/gallery.go      | 48 ++++++++++++++++++++++++++++++++++++++++++++-
 api/gallery_test.go | 30 ++++++++++++++++++++++++++++
 3 files changed, 78 insertions(+), 1 deletion(-)
 create mode 100644 api/gallery_test.go

diff --git a/Makefile b/Makefile
index 87337d64..263fe413 100644
--- a/Makefile
+++ b/Makefile
@@ -65,6 +65,7 @@ gpt4all:
 	@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gptj_/g' {} +
 	@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_gptj_replace/g' {} +
 	@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} +
+	@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/regex_escape/gpt4allregex_escape/g' {} +
 	mv ./gpt4all/gpt4all-backend/llama.cpp/llama_util.h ./gpt4all/gpt4all-backend/llama.cpp/gptjllama_util.h
 
 ## BERT embeddings
diff --git a/api/gallery.go b/api/gallery.go
index cb165f84..fac48622 100644
--- a/api/gallery.go
+++ b/api/gallery.go
@@ -5,6 +5,8 @@ import (
 	"fmt"
 	"io/ioutil"
 	"net/http"
+	"net/url"
+	"strings"
 	"sync"
 
 	"github.com/go-skynet/LocalAI/pkg/gallery"
@@ -63,8 +65,15 @@ func (g *galleryApplier) start(c context.Context, cm *ConfigMerger) {
 				updateError := func(e error) {
 					g.updatestatus(op.id, &galleryOpStatus{Error: e, Processed: true})
 				}
+
+				url, err := op.req.DecodeURL()
+				if err != nil {
+					updateError(err)
+					continue
+				}
+
 				// Send a GET request to the URL
-				response, err := http.Get(op.req.URL)
+				response, err := http.Get(url)
 				if err != nil {
 					updateError(err)
 					continue
@@ -113,6 +122,43 @@ type ApplyGalleryModelRequest struct {
 	AdditionalFiles []gallery.File `json:"files"`
 }
 
+const (
+	githubURI = "github:"
+)
+
+func (request ApplyGalleryModelRequest) DecodeURL() (string, error) {
+	input := request.URL
+	var rawURL string
+
+	if strings.HasPrefix(input, githubURI) {
+		parts := strings.Split(input, ":")
+		repoParts := strings.Split(parts[1], "@")
+		branch := "main"
+
+		if len(repoParts) > 1 {
+			branch = repoParts[1]
+		}
+
+		repoPath := strings.Split(repoParts[0], "/")
+		org := repoPath[0]
+		project := repoPath[1]
+		projectPath := strings.Join(repoPath[2:], "/")
+
+		rawURL = fmt.Sprintf("https://raw.githubusercontent.com/%s/%s/%s/%s", org, project, branch, projectPath)
+	} else if strings.HasPrefix(input, "http://") || strings.HasPrefix(input, "https://") {
+		// Handle regular URLs
+		u, err := url.Parse(input)
+		if err != nil {
+			return "", fmt.Errorf("invalid URL: %w", err)
+		}
+		rawURL = u.String()
+	} else {
+		return "", fmt.Errorf("invalid URL format")
+	}
+
+	return rawURL, nil
+}
+
 func getOpStatus(g *galleryApplier) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 
diff --git a/api/gallery_test.go b/api/gallery_test.go
new file mode 100644
index 00000000..1c92c0d5
--- /dev/null
+++ b/api/gallery_test.go
@@ -0,0 +1,30 @@
+package api_test
+
+import (
+	. "github.com/go-skynet/LocalAI/api"
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+var _ = Describe("Gallery API tests", func() {
+	Context("requests", func() {
+		It("parses github with a branch", func() {
+			req := ApplyGalleryModelRequest{URL: "github:go-skynet/model-gallery/gpt4all-j.yaml@main"}
+			str, err := req.DecodeURL()
+			Expect(err).ToNot(HaveOccurred())
+			Expect(str).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml"))
+		})
+		It("parses github without a branch", func() {
+			req := ApplyGalleryModelRequest{URL: "github:go-skynet/model-gallery/gpt4all-j.yaml"}
+			str, err := req.DecodeURL()
+			Expect(err).ToNot(HaveOccurred())
+			Expect(str).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml"))
+		})
+		It("parses URLS", func() {
+			req := ApplyGalleryModelRequest{URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml"}
+			str, err := req.DecodeURL()
+			Expect(err).ToNot(HaveOccurred())
+			Expect(str).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml"))
+		})
+	})
+})

From d0c033d09b1614ba6407b560d6f8661056220ef2 Mon Sep 17 00:00:00 2001
From: Aisuko <urakiny@gmail.com>
Date: Sat, 20 May 2023 17:10:20 +1000
Subject: [PATCH 20/32] feat: add PR template and stale configuration (#316)

Signed-off-by: Aisuko <urakiny@gmail.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 .github/PULL_REQUEST_TEMPLATE.md | 23 +++++++++++++++++++++++
 .github/stale.yml                | 18 ++++++++++++++++++
 2 files changed, 41 insertions(+)
 create mode 100644 .github/PULL_REQUEST_TEMPLATE.md
 create mode 100644 .github/stale.yml

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
new file mode 100644
index 00000000..2318ad47
--- /dev/null
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,23 @@
+**Description**
+
+This PR fixes #
+
+**Notes for Reviewers**
+
+
+**[Signed commits](../CONTRIBUTING.md#signing-off-on-commits-developer-certificate-of-origin)**
+- [ ] Yes, I signed my commits.
+ 
+
+<!--
+Thank you for contributing to LocalAI! 
+
+Contributing Conventions:
+
+1. Include descriptive PR titles with [<component-name>] prepended.
+2. Build and test your changes before submitting a PR. 
+3. Sign your commits
+
+By following the community's contribution conventions upfront, the review process will 
+be accelerated and your PR merged more quickly.
+-->
\ No newline at end of file
diff --git a/.github/stale.yml b/.github/stale.yml
new file mode 100644
index 00000000..af48bade
--- /dev/null
+++ b/.github/stale.yml
@@ -0,0 +1,18 @@
+# Number of days of inactivity before an issue becomes stale
+daysUntilStale: 45
+# Number of days of inactivity before a stale issue is closed
+daysUntilClose: 10
+# Issues with these labels will never be considered stale
+exemptLabels:
+  - issue/willfix
+# Label to use when marking an issue as stale
+staleLabel: issue/stale
+# Comment to post when marking an issue as stale. Set to `false` to disable
+markComment: >
+  This issue has been automatically marked as stale because it has not had
+  recent activity. It will be closed if no further activity occurs. Thank you
+  for your contributions.
+# Comment to post when closing a stale issue. Set to `false` to disable
+closeComment: >
+  This issue is being automatically closed due to inactivity.
+  However, you may choose to reopen this issue.
\ No newline at end of file

From 9609e4392bc4369637e7be74e77246e162d65cc9 Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Sat, 20 May 2023 10:53:22 +0200
Subject: [PATCH 21/32] :arrow_up: Update go-skynet/go-llama.cpp (#321)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 263fe413..d4103630 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@ GOTEST=$(GOCMD) test
 GOVET=$(GOCMD) vet
 BINARY_NAME=local-ai
 
-GOLLAMA_VERSION?=3ee537e8cb52cf8334832a07fe753c756130e949
+GOLLAMA_VERSION?=a298043ef5f1e7039ae4f193027eab4bb88312b6
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
 GPT4ALL_VERSION?=914519e772fd78c15691dcd0b8bac60d6af514ec
 GOGPT2_VERSION?=7bff56f0224502c1c9ed6258d2a17e8084628827

From 5b22704799d05397774efdf1ed9269bbf7955dae Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Sat, 20 May 2023 10:55:22 +0200
Subject: [PATCH 22/32] fix(deps): update github.com/go-skynet/go-llama.cpp
 digest to a298043 (#322)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 83095d25..49925087 100644
--- a/go.mod
+++ b/go.mod
@@ -10,7 +10,7 @@ require (
 	github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4
 	github.com/go-skynet/go-gpt2.cpp v0.0.0-20230512145559-7bff56f02245
 	github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c
-	github.com/go-skynet/go-llama.cpp v0.0.0-20230519203945-3ee537e8cb52
+	github.com/go-skynet/go-llama.cpp v0.0.0-20230520082618-a298043ef5f1
 	github.com/gofiber/fiber/v2 v2.45.0
 	github.com/google/uuid v1.3.0
 	github.com/hashicorp/go-multierror v1.1.1
diff --git a/go.sum b/go.sum
index 752ade5f..ba53edc3 100644
--- a/go.sum
+++ b/go.sum
@@ -70,6 +70,8 @@ github.com/go-skynet/go-llama.cpp v0.0.0-20230518171914-33f8c2db53bf h1:D9CLQwr1
 github.com/go-skynet/go-llama.cpp v0.0.0-20230518171914-33f8c2db53bf/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230519203945-3ee537e8cb52 h1:VU64ntI6BuWRMFMzx8OTH26k5BJqY8vgW0Igyn9MYKc=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230519203945-3ee537e8cb52/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230520082618-a298043ef5f1 h1:i0oM2MERUgMIRmjOcv22TDQULxbmY8o9rZKLKKyWXLo=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230520082618-a298043ef5f1/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
 github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=

From 7bc08797f9693bec47b9c306d7ac719c8435de19 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Sat, 20 May 2023 22:50:17 +1000
Subject: [PATCH 23/32] fix(deps): update module github.com/gofiber/fiber/v2 to
 v2.46.0 (#308)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 49925087..e26e067a 100644
--- a/go.mod
+++ b/go.mod
@@ -11,7 +11,7 @@ require (
 	github.com/go-skynet/go-gpt2.cpp v0.0.0-20230512145559-7bff56f02245
 	github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c
 	github.com/go-skynet/go-llama.cpp v0.0.0-20230520082618-a298043ef5f1
-	github.com/gofiber/fiber/v2 v2.45.0
+	github.com/gofiber/fiber/v2 v2.46.0
 	github.com/google/uuid v1.3.0
 	github.com/hashicorp/go-multierror v1.1.1
 	github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642
diff --git a/go.sum b/go.sum
index ba53edc3..f70fe7dc 100644
--- a/go.sum
+++ b/go.sum
@@ -77,6 +77,8 @@ github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4
 github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
 github.com/gofiber/fiber/v2 v2.45.0 h1:p4RpkJT9GAW6parBSbcNFH2ApnAuW3OzaQzbOCoDu+s=
 github.com/gofiber/fiber/v2 v2.45.0/go.mod h1:DNl0/c37WLe0g92U6lx1VMQuxGUQY5V7EIaVoEsUffc=
+github.com/gofiber/fiber/v2 v2.46.0 h1:wkkWotblsGVlLjXj2dpgKQAYHtXumsK/HyFugQM68Ns=
+github.com/gofiber/fiber/v2 v2.46.0/go.mod h1:DNl0/c37WLe0g92U6lx1VMQuxGUQY5V7EIaVoEsUffc=
 github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
 github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
 github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=

From 05a3d569b00e9661c45df45b3d0e69dc6dcf5a2e Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 20 May 2023 17:03:53 +0200
Subject: [PATCH 24/32] feat: allow to override model config (#323)

---
 .env                                |  28 ++++-
 README.md                           |   7 +-
 api/api.go                          |   2 +-
 api/api_test.go                     | 169 ++++++++++++++++++++++++++++
 api/gallery.go                      |  13 ++-
 examples/langchain-chroma/README.md |   2 +
 examples/langchain-python/README.md |   1 +
 examples/query_data/README.md       |   2 +
 go.mod                              |   4 +-
 go.sum                              |  49 +-------
 pkg/gallery/models.go               |  11 +-
 pkg/gallery/models_test.go          |  45 +++++++-
 12 files changed, 269 insertions(+), 64 deletions(-)

diff --git a/.env b/.env
index c5248593..73e3174d 100644
--- a/.env
+++ b/.env
@@ -1,6 +1,30 @@
+## Set number of threads.
+## Note: prefer the number of physical cores. Overbooking the CPU degrades performance notably.
 # THREADS=14
+
+## Specify a different bind address (defaults to ":8080")
+# ADDRESS=127.0.0.1:8080
+
+## Default models context size
 # CONTEXT_SIZE=512
+
+## Default path for models
 MODELS_PATH=/models
+
+## Enable debug mode
 # DEBUG=true
-# BUILD_TYPE=generic
-# REBUILD=true
+
+## Specify a build type. Available: cublas, openblas.
+# BUILD_TYPE=openblas
+
+## Uncomment and set to false to disable rebuilding from source
+# REBUILD=false
+
+## Enable image generation with stablediffusion (requires REBUILD=true)
+# GO_TAGS=stablediffusion
+
+## Path where to store generated images
+# IMAGE_PATH=/tmp
+
+## Specify a default upload limit in MB (whisper)
+# UPLOAD_LIMIT
\ No newline at end of file
diff --git a/README.md b/README.md
index 007802a0..e71a56de 100644
--- a/README.md
+++ b/README.md
@@ -869,17 +869,18 @@ curl http://localhost:8080/models/apply -H "Content-Type: application/json" -d '
             "uri": "<additional_file>",
             "sha256": "<additional_file_hash>",
             "name": "<additional_file_name>"
-        }
+        },
+      "overrides": { "backend": "...", "f16": true }
      ]
    }
 ```
 
-An optional, list of additional files can be specified to be downloaded. The `name` allows to override the model name.
+An optional, list of additional files can be specified to be downloaded within `files`. The `name` allows to override the model name. Finally it is possible to override the model config file with `override`.
 
 Returns an `uuid` and an `url` to follow up the state of the process:
 
 ```json
-{ "uid":"251475c9-f666-11ed-95e0-9a8a4480ac58", "status":"http://localhost:8080/models/jobs/251475c9-f666-11ed-95e0-9a8a4480ac58"}
+{ "uuid":"251475c9-f666-11ed-95e0-9a8a4480ac58", "status":"http://localhost:8080/models/jobs/251475c9-f666-11ed-95e0-9a8a4480ac58"}
 ```
 
 To see a collection example of curated models definition files, see the [model-gallery](https://github.com/go-skynet/model-gallery).
diff --git a/api/api.go b/api/api.go
index ec7c9815..b81a89f5 100644
--- a/api/api.go
+++ b/api/api.go
@@ -74,7 +74,7 @@ func App(c context.Context, configFile string, loader *model.ModelLoader, upload
 	applier := newGalleryApplier(loader.ModelPath)
 	applier.start(c, cm)
 	app.Post("/models/apply", applyModelGallery(loader.ModelPath, cm, applier.C))
-	app.Get("/models/jobs/:uid", getOpStatus(applier))
+	app.Get("/models/jobs/:uuid", getOpStatus(applier))
 
 	// openAI compatible API endpoint
 
diff --git a/api/api_test.go b/api/api_test.go
index 1a5d7d40..f061527f 100644
--- a/api/api_test.go
+++ b/api/api_test.go
@@ -1,7 +1,12 @@
 package api_test
 
 import (
+	"bytes"
 	"context"
+	"encoding/json"
+	"fmt"
+	"io/ioutil"
+	"net/http"
 	"os"
 	"path/filepath"
 	"runtime"
@@ -11,11 +16,85 @@ import (
 	"github.com/gofiber/fiber/v2"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
+	"gopkg.in/yaml.v3"
 
 	openaigo "github.com/otiai10/openaigo"
 	"github.com/sashabaranov/go-openai"
 )
 
+type modelApplyRequest struct {
+	URL       string            `json:"url"`
+	Name      string            `json:"name"`
+	Overrides map[string]string `json:"overrides"`
+}
+
+func getModelStatus(url string) (response map[string]interface{}) {
+	// Create the HTTP request
+	resp, err := http.Get(url)
+	if err != nil {
+		fmt.Println("Error creating request:", err)
+		return
+	}
+	defer resp.Body.Close()
+
+	body, err := ioutil.ReadAll(resp.Body)
+	if err != nil {
+		fmt.Println("Error reading response body:", err)
+		return
+	}
+
+	// Unmarshal the response into a map[string]interface{}
+	err = json.Unmarshal(body, &response)
+	if err != nil {
+		fmt.Println("Error unmarshaling JSON response:", err)
+		return
+	}
+	return
+}
+func postModelApplyRequest(url string, request modelApplyRequest) (response map[string]interface{}) {
+
+	//url := "http://localhost:AI/models/apply"
+
+	// Create the request payload
+
+	payload, err := json.Marshal(request)
+	if err != nil {
+		fmt.Println("Error marshaling JSON:", err)
+		return
+	}
+
+	// Create the HTTP request
+	req, err := http.NewRequest("POST", url, bytes.NewBuffer(payload))
+	if err != nil {
+		fmt.Println("Error creating request:", err)
+		return
+	}
+	req.Header.Set("Content-Type", "application/json")
+
+	// Make the request
+	client := &http.Client{}
+	resp, err := client.Do(req)
+	if err != nil {
+		fmt.Println("Error making request:", err)
+		return
+	}
+	defer resp.Body.Close()
+
+	body, err := ioutil.ReadAll(resp.Body)
+	if err != nil {
+		fmt.Println("Error reading response body:", err)
+		return
+	}
+
+	// Unmarshal the response into a map[string]interface{}
+	err = json.Unmarshal(body, &response)
+	if err != nil {
+		fmt.Println("Error unmarshaling JSON response:", err)
+		return
+	}
+	return
+}
+
 var _ = Describe("API test", func() {
 
 	var app *fiber.App
@@ -24,6 +103,96 @@ var _ = Describe("API test", func() {
 	var client2 *openaigo.Client
 	var c context.Context
 	var cancel context.CancelFunc
+	var tmpdir string
+
+	Context("API with ephemeral models", func() {
+		BeforeEach(func() {
+			var err error
+			tmpdir, err = os.MkdirTemp("", "")
+			Expect(err).ToNot(HaveOccurred())
+
+			modelLoader = model.NewModelLoader(tmpdir)
+			c, cancel = context.WithCancel(context.Background())
+
+			app = App(c, "", modelLoader, 15, 1, 512, false, true, true, "")
+			go app.Listen("127.0.0.1:9090")
+
+			defaultConfig := openai.DefaultConfig("")
+			defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
+
+			client2 = openaigo.NewClient("")
+			client2.BaseURL = defaultConfig.BaseURL
+
+			// Wait for API to be ready
+			client = openai.NewClientWithConfig(defaultConfig)
+			Eventually(func() error {
+				_, err := client.ListModels(context.TODO())
+				return err
+			}, "2m").ShouldNot(HaveOccurred())
+		})
+
+		AfterEach(func() {
+			cancel()
+			app.Shutdown()
+			os.RemoveAll(tmpdir)
+		})
+
+		Context("Applying models", func() {
+			It("overrides models", func() {
+				response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
+					URL:  "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
+					Name: "bert",
+					Overrides: map[string]string{
+						"backend": "llama",
+					},
+				})
+
+				Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
+
+				uuid := response["uuid"].(string)
+
+				Eventually(func() bool {
+					response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
+					fmt.Println(response)
+					return response["processed"].(bool)
+				}, "360s").Should(Equal(true))
+
+				dat, err := os.ReadFile(filepath.Join(tmpdir, "bert.yaml"))
+				Expect(err).ToNot(HaveOccurred())
+
+				content := map[string]interface{}{}
+				err = yaml.Unmarshal(dat, &content)
+				Expect(err).ToNot(HaveOccurred())
+				Expect(content["backend"]).To(Equal("llama"))
+			})
+			It("apply models without overrides", func() {
+				response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
+					URL:       "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
+					Name:      "bert",
+					Overrides: map[string]string{},
+				})
+
+				Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
+
+				uuid := response["uuid"].(string)
+
+				Eventually(func() bool {
+					response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
+					fmt.Println(response)
+					return response["processed"].(bool)
+				}, "360s").Should(Equal(true))
+
+				dat, err := os.ReadFile(filepath.Join(tmpdir, "bert.yaml"))
+				Expect(err).ToNot(HaveOccurred())
+
+				content := map[string]interface{}{}
+				err = yaml.Unmarshal(dat, &content)
+				Expect(err).ToNot(HaveOccurred())
+				Expect(content["backend"]).To(Equal("bert-embeddings"))
+			})
+		})
+	})
+
 	Context("API query", func() {
 		BeforeEach(func() {
 			modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
diff --git a/api/gallery.go b/api/gallery.go
index fac48622..591b1b7a 100644
--- a/api/gallery.go
+++ b/api/gallery.go
@@ -97,7 +97,7 @@ func (g *galleryApplier) start(c context.Context, cm *ConfigMerger) {
 
 				config.Files = append(config.Files, op.req.AdditionalFiles...)
 
-				if err := gallery.Apply(g.modelPath, op.req.Name, &config); err != nil {
+				if err := gallery.Apply(g.modelPath, op.req.Name, &config, op.req.Overrides); err != nil {
 					updateError(err)
 					continue
 				}
@@ -117,9 +117,10 @@ func (g *galleryApplier) start(c context.Context, cm *ConfigMerger) {
 // endpoints
 
 type ApplyGalleryModelRequest struct {
-	URL             string         `json:"url"`
-	Name            string         `json:"name"`
-	AdditionalFiles []gallery.File `json:"files"`
+	URL             string                 `json:"url"`
+	Name            string                 `json:"name"`
+	Overrides       map[string]interface{} `json:"overrides"`
+	AdditionalFiles []gallery.File         `json:"files"`
 }
 
 const (
@@ -162,7 +163,7 @@ func (request ApplyGalleryModelRequest) DecodeURL() (string, error) {
 func getOpStatus(g *galleryApplier) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 
-		status := g.getstatus(c.Params("uid"))
+		status := g.getstatus(c.Params("uuid"))
 		if status == nil {
 			return fmt.Errorf("could not find any status for ID")
 		}
@@ -188,7 +189,7 @@ func applyModelGallery(modelPath string, cm *ConfigMerger, g chan galleryOp) fun
 			id:  uuid.String(),
 		}
 		return c.JSON(struct {
-			ID        string `json:"uid"`
+			ID        string `json:"uuid"`
 			StatusURL string `json:"status"`
 		}{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
 	}
diff --git a/examples/langchain-chroma/README.md b/examples/langchain-chroma/README.md
index 17207a02..9fd9e312 100644
--- a/examples/langchain-chroma/README.md
+++ b/examples/langchain-chroma/README.md
@@ -36,6 +36,8 @@ pip install -r requirements.txt
 
 In this step we will create a local vector database from our document set, so later we can ask questions on it with the LLM.
 
+Note: **OPENAI_API_KEY** is not required. However the library might fail if no API_KEY is passed by, so an arbitrary string can be used.
+
 ```bash
 export OPENAI_API_BASE=http://localhost:8080/v1
 export OPENAI_API_KEY=sk-
diff --git a/examples/langchain-python/README.md b/examples/langchain-python/README.md
index a98c48f7..2472aab1 100644
--- a/examples/langchain-python/README.md
+++ b/examples/langchain-python/README.md
@@ -26,6 +26,7 @@ pip install langchain
 pip install openai
 
 export OPENAI_API_BASE=http://localhost:8080
+# Note: **OPENAI_API_KEY** is not required. However the library might fail if no API_KEY is passed by, so an arbitrary string can be used.
 export OPENAI_API_KEY=sk-
 
 python test.py
diff --git a/examples/query_data/README.md b/examples/query_data/README.md
index f7a4e1fe..c4e384cd 100644
--- a/examples/query_data/README.md
+++ b/examples/query_data/README.md
@@ -35,6 +35,8 @@ docker-compose up -d --build
 
 In this step we will create a local vector database from our document set, so later we can ask questions on it with the LLM.
 
+Note: **OPENAI_API_KEY** is not required. However the library might fail if no API_KEY is passed by, so an arbitrary string can be used.
+
 ```bash
 export OPENAI_API_BASE=http://localhost:8080/v1
 export OPENAI_API_KEY=sk-
diff --git a/go.mod b/go.mod
index e26e067a..1682e507 100644
--- a/go.mod
+++ b/go.mod
@@ -9,16 +9,15 @@ require (
 	github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf
 	github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4
 	github.com/go-skynet/go-gpt2.cpp v0.0.0-20230512145559-7bff56f02245
-	github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c
 	github.com/go-skynet/go-llama.cpp v0.0.0-20230520082618-a298043ef5f1
 	github.com/gofiber/fiber/v2 v2.46.0
 	github.com/google/uuid v1.3.0
 	github.com/hashicorp/go-multierror v1.1.1
+	github.com/imdario/mergo v0.3.15
 	github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642
 	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230519014017-914519e772fd
 	github.com/onsi/ginkgo/v2 v2.9.5
 	github.com/onsi/gomega v1.27.7
-	github.com/otiai10/copy v1.11.0
 	github.com/otiai10/openaigo v1.1.0
 	github.com/rs/zerolog v1.29.1
 	github.com/sashabaranov/go-openai v1.9.4
@@ -52,6 +51,7 @@ require (
 	github.com/mattn/go-colorable v0.1.13 // indirect
 	github.com/mattn/go-isatty v0.0.18 // indirect
 	github.com/mattn/go-runewidth v0.0.14 // indirect
+	github.com/otiai10/mint v1.5.1 // indirect
 	github.com/philhofer/fwd v1.1.2 // indirect
 	github.com/rivo/uniseg v0.2.0 // indirect
 	github.com/russross/blackfriday/v2 v2.1.0 // indirect
diff --git a/go.sum b/go.sum
index f70fe7dc..c1ff1ac1 100644
--- a/go.sum
+++ b/go.sum
@@ -16,16 +16,8 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be h1:3Hic97PY6hcw/SY44RuR7kyONkxd744RFeRrqckzwNQ=
-github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
-github.com/donomii/go-rwkv.cpp v0.0.0-20230510174014-07166da10cb2 h1:YNbUAyIRtaLODitigJU1EM5ubmMu5FmHtYAayJD6Vbg=
-github.com/donomii/go-rwkv.cpp v0.0.0-20230510174014-07166da10cb2/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
 github.com/donomii/go-rwkv.cpp v0.0.0-20230515123100-6fdd0c338e56 h1:s8/MZdicstKi5fn9D9mKGIQ/q6IWCYCk/BM68i8v51w=
 github.com/donomii/go-rwkv.cpp v0.0.0-20230515123100-6fdd0c338e56/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
-github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230508180809-bf2449dfae35 h1:sMg/SgnMPS/HNUO/2kGm72vl8R9TmNIwgLFr2TNwR3g=
-github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230508180809-bf2449dfae35/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
-github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230509153812-1d17cd5bb37a h1:MlyiDLNCM/wjbv8U5Elj18NvaAgl61SGiRUpqQz5dfs=
-github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230509153812-1d17cd5bb37a/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230515153606-95b02d76b04d h1:uxKTbiRnplE2SubchneSf4NChtxLJtOy9VdHnQMT0d0=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230515153606-95b02d76b04d/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
 github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
@@ -46,30 +38,12 @@ github.com/go-openapi/spec v0.20.4/go.mod h1:faYFR1CvsJZ0mNsmsphTMSoRrNV3TEDoAM7
 github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk=
 github.com/go-openapi/swag v0.19.15 h1:D2NRCBzS9/pEY3gP9Nl8aDqGUcPFrwG2p+CNFrLyrCM=
 github.com/go-openapi/swag v0.19.15/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ=
-github.com/go-skynet/bloomz.cpp v0.0.0-20230510195113-ad7e89a0885f h1:GW8RQa1RVeDF1dOuAP/y6xWVC+BRtf9tJOuEza6Asbg=
-github.com/go-skynet/bloomz.cpp v0.0.0-20230510195113-ad7e89a0885f/go.mod h1:wc0fJ9V04yiYTfgKvE5RUUSRQ5Kzi0Bo4I+U3nNOUuA=
 github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf h1:VJfSn8hIDE+K5+h38M3iAyFXrxpRExMKRdTk33UDxsw=
 github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf/go.mod h1:wc0fJ9V04yiYTfgKvE5RUUSRQ5Kzi0Bo4I+U3nNOUuA=
-github.com/go-skynet/go-bert.cpp v0.0.0-20230510101404-7bb183b147ea h1:8Isk9D+Auth5OuXVAQPC3MO+5zF/2S7mvs2JZLw6a+8=
-github.com/go-skynet/go-bert.cpp v0.0.0-20230510101404-7bb183b147ea/go.mod h1:NHwIVvsg7Jh6p0M4uBLVmSMEaPUia6O6yjXUpLWVJmQ=
-github.com/go-skynet/go-bert.cpp v0.0.0-20230510124618-ec771ec71557 h1:LD66fKtvP2lmyuuKL8pBat/pVTKUbLs3L5fM/5lyi4w=
-github.com/go-skynet/go-bert.cpp v0.0.0-20230510124618-ec771ec71557/go.mod h1:NHwIVvsg7Jh6p0M4uBLVmSMEaPUia6O6yjXUpLWVJmQ=
 github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4 h1:+3KPDf4Wv1VHOkzAfZnlj9qakLSYggTpm80AswhD/FU=
 github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4/go.mod h1:VY0s5KoAI2jRCvQXKuDeEEe8KG7VaWifSNJSk+E1KtY=
-github.com/go-skynet/go-gpt2.cpp v0.0.0-20230509180201-d49823284cc6 h1:XshpypO6ekU09CI19vuzke2a1Es1lV5ZaxA7CUehu0E=
-github.com/go-skynet/go-gpt2.cpp v0.0.0-20230509180201-d49823284cc6/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
 github.com/go-skynet/go-gpt2.cpp v0.0.0-20230512145559-7bff56f02245 h1:IcfYY5uH0DdDXEJKJ8bq0WZCd9guPPd3xllaWNy8LOk=
 github.com/go-skynet/go-gpt2.cpp v0.0.0-20230512145559-7bff56f02245/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
-github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c h1:48I7jpLNGiQeBmF0SFVVbREh8vlG0zN13v9LH5ctXis=
-github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c/go.mod h1:5VZ9XbcINI0XcHhkcX8GPK8TplFGAzu1Hrg4tNiMCtI=
-github.com/go-skynet/go-llama.cpp v0.0.0-20230510072905-70593fccbe4b h1:qqxrjY8fYDXQahmCMTCACahm1tbiqHLPUHALkFLyBfo=
-github.com/go-skynet/go-llama.cpp v0.0.0-20230510072905-70593fccbe4b/go.mod h1:DLfsPD7tYYnpksERH83HSf7qVNW3FIwmz7/zfYO0/6I=
-github.com/go-skynet/go-llama.cpp v0.0.0-20230516230554-b7bbefbe0b84 h1:f5iYF75bAr73Tl8AdtFD5Urs/2bsHKPh52K++jLbsfk=
-github.com/go-skynet/go-llama.cpp v0.0.0-20230516230554-b7bbefbe0b84/go.mod h1:jxyQ26t1aKC5Gn782w9WWh5n1133PxCOfkuc01xM4RQ=
-github.com/go-skynet/go-llama.cpp v0.0.0-20230518171914-33f8c2db53bf h1:D9CLQwr1eqSnV0DM7YGOKhSfNajj2qOA7XAD6+p1/HI=
-github.com/go-skynet/go-llama.cpp v0.0.0-20230518171914-33f8c2db53bf/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
-github.com/go-skynet/go-llama.cpp v0.0.0-20230519203945-3ee537e8cb52 h1:VU64ntI6BuWRMFMzx8OTH26k5BJqY8vgW0Igyn9MYKc=
-github.com/go-skynet/go-llama.cpp v0.0.0-20230519203945-3ee537e8cb52/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230520082618-a298043ef5f1 h1:i0oM2MERUgMIRmjOcv22TDQULxbmY8o9rZKLKKyWXLo=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230520082618-a298043ef5f1/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
@@ -91,6 +65,8 @@ github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brv
 github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
 github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
 github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
+github.com/imdario/mergo v0.3.15 h1:M8XP7IuFNsqUx6VPK2P9OSmsYsI/YFaGil0uD21V3dM=
+github.com/imdario/mergo v0.3.15/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY=
 github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
 github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
 github.com/klauspost/compress v1.16.3 h1:XuJt9zzcnaz6a16/OU53ZjWp/v7/42WcR5t2a0PcNQY=
@@ -113,31 +89,18 @@ github.com/mattn/go-isatty v0.0.18 h1:DOKFKCQ7FNG2L1rbrmstDN4QVRdS89Nkh85u68Uwp9
 github.com/mattn/go-isatty v0.0.18/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
 github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
 github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
-github.com/mudler/go-stable-diffusion v0.0.0-20230516104333-2f32a16b5b24 h1:XfRD/bZom6u4zji7aB0urIVOsPe43KlkzSRrVhlzaOM=
-github.com/mudler/go-stable-diffusion v0.0.0-20230516104333-2f32a16b5b24/go.mod h1:8ufRkpz/S/9ahkaxzZ5i4WMgO9w4InEhuRoT7vK5Rnw=
 github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642 h1:KTkh3lOUsGqQyP4v+oa38sPFdrZtNnM4HaxTb3epdYs=
 github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642/go.mod h1:8ufRkpz/S/9ahkaxzZ5i4WMgO9w4InEhuRoT7vK5Rnw=
 github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs=
 github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
-github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230516143155-79d6243fe1bc h1:OPavP/SUsVWVYPhSUZKZeX8yDSQzf4G+BmUmwzrLTyI=
-github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230516143155-79d6243fe1bc/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
-github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230518171731-546600fb6878 h1:3MFUW2a1Aqm2nMF5f+PNGq55cbxIzkRHQX/o7JVysAo=
-github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230518171731-546600fb6878/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
-github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230518183047-94f401889042 h1:bMU75tTgyw6mYDa5NbOHlZ1KYqQgz7heQFfTwuvCGww=
-github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230518183047-94f401889042/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230519014017-914519e772fd h1:kMnZASxCNc8GsPuAV94tltEsfT6T+esuB+rgzdjwFVM=
 github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230519014017-914519e772fd/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
-github.com/onsi/ginkgo/v2 v2.9.4 h1:xR7vG4IXt5RWx6FfIjyAtsoMAtnc3C/rFXBBd2AjZwE=
-github.com/onsi/ginkgo/v2 v2.9.4/go.mod h1:gCQYp2Q+kSoIj7ykSVb9nskRSsR6PUj4AiLywzIhbKM=
 github.com/onsi/ginkgo/v2 v2.9.5 h1:+6Hr4uxzP4XIUyAkg61dWBw8lb/gc4/X5luuxN/EC+Q=
 github.com/onsi/ginkgo/v2 v2.9.5/go.mod h1:tvAoo1QUJwNEU2ITftXTpR7R1RbCzoZUOs3RonqW57k=
-github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE=
-github.com/onsi/gomega v1.27.6/go.mod h1:PIQNjfQwkP3aQAH7lf7j87O/5FiNr+ZR8+ipb+qQlhg=
 github.com/onsi/gomega v1.27.7 h1:fVih9JD6ogIiHUN6ePK7HJidyEDpWGVB5mzM7cWNXoU=
 github.com/onsi/gomega v1.27.7/go.mod h1:1p8OOlwo2iUUDsHnOrjE5UKYJ+e3W8eQ3qSlRahPmr4=
-github.com/otiai10/copy v1.11.0 h1:OKBD80J/mLBrwnzXqGtFCzprFSGioo30JcmR4APsNwc=
-github.com/otiai10/copy v1.11.0/go.mod h1:rSaLseMUsZFFbsFGc7wCJnnkTAvdc5L6VWxPE4308Ww=
 github.com/otiai10/mint v1.5.1 h1:XaPLeE+9vGbuyEHem1JNk3bYc7KKqyI/na0/mLd/Kks=
+github.com/otiai10/mint v1.5.1/go.mod h1:MJm72SBthJjz8qhefc4z1PYEieWmy8Bku7CjcAqyUSM=
 github.com/otiai10/openaigo v1.1.0 h1:zRvGBqZUW5PCMgdkJNsPVTBd8tOLCMTipXE5wD2pdTg=
 github.com/otiai10/openaigo v1.1.0/go.mod h1:792bx6AWTS61weDi2EzKpHHnTF4eDMAlJ5GvAk/mgPg=
 github.com/philhofer/fwd v1.1.1/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU=
@@ -153,8 +116,6 @@ github.com/rs/zerolog v1.29.1 h1:cO+d60CHkknCbvzEWxP0S9K6KqyTjrCNUy1LdQLCGPc=
 github.com/rs/zerolog v1.29.1/go.mod h1:Le6ESbR7hc+DP6Lt1THiV8CQSdkkNrd3R0XbEgp3ZBU=
 github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
 github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
-github.com/sashabaranov/go-openai v1.9.3 h1:uNak3Rn5pPsKRs9bdT7RqRZEyej/zdZOEI2/8wvrFtM=
-github.com/sashabaranov/go-openai v1.9.3/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
 github.com/sashabaranov/go-openai v1.9.4 h1:KanoCEoowAI45jVXlenMCckutSRr39qOmSi9MyPBfZM=
 github.com/sashabaranov/go-openai v1.9.4/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
 github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 h1:rmMl4fXJhKMNWl+K+r/fq4FbbKI+Ia2m9hYBLm2h4G4=
@@ -198,8 +159,6 @@ golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v
 golang.org/x/net v0.0.0-20210421230115-4e50805a0758/go.mod h1:72T/g9IO56b78aLF+1Kcs5dz7/ng1VjMUvfKvpfy+jM=
 golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
 golang.org/x/net v0.3.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE=
-golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM=
-golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns=
 golang.org/x/net v0.10.0 h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M=
 golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -237,8 +196,6 @@ golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtn
 golang.org/x/tools v0.0.0-20201022035929-9cf592e881e9/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
 golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
 golang.org/x/tools v0.4.0/go.mod h1:UE5sM2OK9E/d67R0ANs2xJizIymRP5gJU295PvKXxjQ=
-golang.org/x/tools v0.8.0 h1:vSDcovVPld282ceKgDimkRSC8kpaH1dgyc9UMzlt84Y=
-golang.org/x/tools v0.8.0/go.mod h1:JxBZ99ISMI5ViVkT1tr6tdNmXeTrcpVSD3vZ1RsRdN4=
 golang.org/x/tools v0.9.1 h1:8WMNJAz3zrtPmnYC7ISf5dEn3MT0gY7jBJfw27yrrLo=
 golang.org/x/tools v0.9.1/go.mod h1:owI94Op576fPu3cIGQeHs3joujW/2Oc6MtlxbF5dfNc=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
diff --git a/pkg/gallery/models.go b/pkg/gallery/models.go
index 8c61380c..f4f86ae7 100644
--- a/pkg/gallery/models.go
+++ b/pkg/gallery/models.go
@@ -8,6 +8,7 @@ import (
 	"os"
 	"path/filepath"
 
+	"github.com/imdario/mergo"
 	"github.com/rs/zerolog/log"
 	"gopkg.in/yaml.v2"
 )
@@ -92,13 +93,17 @@ func verifyPath(path, basePath string) error {
 	return inTrustedRoot(c, basePath)
 }
 
-func Apply(basePath, nameOverride string, config *Config) error {
+func Apply(basePath, nameOverride string, config *Config, configOverrides map[string]interface{}) error {
 	// Create base path if it doesn't exist
 	err := os.MkdirAll(basePath, 0755)
 	if err != nil {
 		return fmt.Errorf("failed to create base path: %v", err)
 	}
 
+	if len(configOverrides) > 0 {
+		log.Debug().Msgf("Config overrides %+v", configOverrides)
+	}
+
 	// Download files and verify their SHA
 	for _, file := range config.Files {
 		log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename)
@@ -231,6 +236,10 @@ func Apply(basePath, nameOverride string, config *Config) error {
 
 	configMap["name"] = name
 
+	if err := mergo.Merge(&configMap, configOverrides, mergo.WithOverride); err != nil {
+		return err
+	}
+
 	// Write updated config file
 	updatedConfigYAML, err := yaml.Marshal(configMap)
 	if err != nil {
diff --git a/pkg/gallery/models_test.go b/pkg/gallery/models_test.go
index 980b3a96..f0e580e9 100644
--- a/pkg/gallery/models_test.go
+++ b/pkg/gallery/models_test.go
@@ -7,6 +7,7 @@ import (
 	. "github.com/go-skynet/LocalAI/pkg/gallery"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
+	"gopkg.in/yaml.v3"
 )
 
 var _ = Describe("Model test", func() {
@@ -18,14 +19,25 @@ var _ = Describe("Model test", func() {
 			c, err := ReadConfigFile(filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml"))
 			Expect(err).ToNot(HaveOccurred())
 
-			err = Apply(tempdir, "", c)
+			err = Apply(tempdir, "", c, map[string]interface{}{})
 			Expect(err).ToNot(HaveOccurred())
 
 			for _, f := range []string{"cerebras", "cerebras-completion.tmpl", "cerebras-chat.tmpl", "cerebras.yaml"} {
 				_, err = os.Stat(filepath.Join(tempdir, f))
 				Expect(err).ToNot(HaveOccurred())
 			}
+
+			content := map[string]interface{}{}
+
+			dat, err := os.ReadFile(filepath.Join(tempdir, "cerebras.yaml"))
+			Expect(err).ToNot(HaveOccurred())
+
+			err = yaml.Unmarshal(dat, content)
+			Expect(err).ToNot(HaveOccurred())
+
+			Expect(content["context_size"]).To(Equal(1024))
 		})
+
 		It("renames model correctly", func() {
 			tempdir, err := os.MkdirTemp("", "test")
 			Expect(err).ToNot(HaveOccurred())
@@ -33,7 +45,7 @@ var _ = Describe("Model test", func() {
 			c, err := ReadConfigFile(filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml"))
 			Expect(err).ToNot(HaveOccurred())
 
-			err = Apply(tempdir, "foo", c)
+			err = Apply(tempdir, "foo", c, map[string]interface{}{})
 			Expect(err).ToNot(HaveOccurred())
 
 			for _, f := range []string{"cerebras", "cerebras-completion.tmpl", "cerebras-chat.tmpl", "foo.yaml"} {
@@ -41,6 +53,33 @@ var _ = Describe("Model test", func() {
 				Expect(err).ToNot(HaveOccurred())
 			}
 		})
+
+		It("overrides parameters", func() {
+			tempdir, err := os.MkdirTemp("", "test")
+			Expect(err).ToNot(HaveOccurred())
+			defer os.RemoveAll(tempdir)
+			c, err := ReadConfigFile(filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml"))
+			Expect(err).ToNot(HaveOccurred())
+
+			err = Apply(tempdir, "foo", c, map[string]interface{}{"backend": "foo"})
+			Expect(err).ToNot(HaveOccurred())
+
+			for _, f := range []string{"cerebras", "cerebras-completion.tmpl", "cerebras-chat.tmpl", "foo.yaml"} {
+				_, err = os.Stat(filepath.Join(tempdir, f))
+				Expect(err).ToNot(HaveOccurred())
+			}
+
+			content := map[string]interface{}{}
+
+			dat, err := os.ReadFile(filepath.Join(tempdir, "foo.yaml"))
+			Expect(err).ToNot(HaveOccurred())
+
+			err = yaml.Unmarshal(dat, content)
+			Expect(err).ToNot(HaveOccurred())
+
+			Expect(content["backend"]).To(Equal("foo"))
+		})
+
 		It("catches path traversals", func() {
 			tempdir, err := os.MkdirTemp("", "test")
 			Expect(err).ToNot(HaveOccurred())
@@ -48,7 +87,7 @@ var _ = Describe("Model test", func() {
 			c, err := ReadConfigFile(filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml"))
 			Expect(err).ToNot(HaveOccurred())
 
-			err = Apply(tempdir, "../../../foo", c)
+			err = Apply(tempdir, "../../../foo", c, map[string]interface{}{})
 			Expect(err).To(HaveOccurred())
 		})
 	})

From 93cc8569c36c9ecc4d000d8f597948c69d5ff8ee Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Sat, 20 May 2023 19:50:01 +0200
Subject: [PATCH 25/32] :arrow_up: Update ggerganov/whisper.cpp (#326)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index d4103630..8023b0f6 100644
--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,7 @@ GPT4ALL_VERSION?=914519e772fd78c15691dcd0b8bac60d6af514ec
 GOGPT2_VERSION?=7bff56f0224502c1c9ed6258d2a17e8084628827
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=07166da10cb2a9e8854395a4f210464dcea76e47
-WHISPER_CPP_VERSION?=95b02d76b04d18e4ce37ed8353a1f0797f1717ea
+WHISPER_CPP_VERSION?=bc89f285d8b025867cae87421824f2dea1c8899f
 BERT_VERSION?=cea1ed76a7f48ef386a8e369f6c82c48cdf2d551
 BLOOMZ_VERSION?=e9366e82abdfe70565644fbfae9651976714efd1
 BUILD_TYPE?=

From c7056756d5221d8e023ebd517fbffd0266ba0fd5 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Sat, 20 May 2023 19:50:28 +0200
Subject: [PATCH 26/32] fix(deps): update
 github.com/ggerganov/whisper.cpp/bindings/go digest to 429b978 (#329)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 1682e507..3a8038a8 100644
--- a/go.mod
+++ b/go.mod
@@ -4,7 +4,7 @@ go 1.19
 
 require (
 	github.com/donomii/go-rwkv.cpp v0.0.0-20230515123100-6fdd0c338e56
-	github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230515153606-95b02d76b04d
+	github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520170006-429b9785c080
 	github.com/go-audio/wav v1.1.0
 	github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf
 	github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4
diff --git a/go.sum b/go.sum
index c1ff1ac1..9faa3e36 100644
--- a/go.sum
+++ b/go.sum
@@ -20,6 +20,8 @@ github.com/donomii/go-rwkv.cpp v0.0.0-20230515123100-6fdd0c338e56 h1:s8/MZdicstK
 github.com/donomii/go-rwkv.cpp v0.0.0-20230515123100-6fdd0c338e56/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230515153606-95b02d76b04d h1:uxKTbiRnplE2SubchneSf4NChtxLJtOy9VdHnQMT0d0=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230515153606-95b02d76b04d/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
+github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520170006-429b9785c080 h1:W3itqKpRX9FhheKiAxdmuOBy/mjDfMf2G1vcuFIYqZc=
+github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520170006-429b9785c080/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
 github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
 github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs=
 github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA=

From 864aaf8c4defa7c879c075c930bc5809624e680b Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Sat, 20 May 2023 20:42:29 +0200
Subject: [PATCH 27/32] :arrow_up: Update go-skynet/go-llama.cpp (#327)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 8023b0f6..37a0d8a9 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@ GOTEST=$(GOCMD) test
 GOVET=$(GOCMD) vet
 BINARY_NAME=local-ai
 
-GOLLAMA_VERSION?=a298043ef5f1e7039ae4f193027eab4bb88312b6
+GOLLAMA_VERSION?=ccf23adfb278c0165d388389a5d60f3fe38e4854
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
 GPT4ALL_VERSION?=914519e772fd78c15691dcd0b8bac60d6af514ec
 GOGPT2_VERSION?=7bff56f0224502c1c9ed6258d2a17e8084628827

From 9ed82199c53e33a9dc3b61fe0cd189fd81c956a1 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Sat, 20 May 2023 20:42:59 +0200
Subject: [PATCH 28/32] fix(deps): update github.com/go-skynet/go-llama.cpp
 digest to ccf23ad (#330)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 3a8038a8..e0a699ce 100644
--- a/go.mod
+++ b/go.mod
@@ -9,7 +9,7 @@ require (
 	github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf
 	github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4
 	github.com/go-skynet/go-gpt2.cpp v0.0.0-20230512145559-7bff56f02245
-	github.com/go-skynet/go-llama.cpp v0.0.0-20230520082618-a298043ef5f1
+	github.com/go-skynet/go-llama.cpp v0.0.0-20230520155239-ccf23adfb278
 	github.com/gofiber/fiber/v2 v2.46.0
 	github.com/google/uuid v1.3.0
 	github.com/hashicorp/go-multierror v1.1.1
diff --git a/go.sum b/go.sum
index 9faa3e36..8ef9cacf 100644
--- a/go.sum
+++ b/go.sum
@@ -48,6 +48,8 @@ github.com/go-skynet/go-gpt2.cpp v0.0.0-20230512145559-7bff56f02245 h1:IcfYY5uH0
 github.com/go-skynet/go-gpt2.cpp v0.0.0-20230512145559-7bff56f02245/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230520082618-a298043ef5f1 h1:i0oM2MERUgMIRmjOcv22TDQULxbmY8o9rZKLKKyWXLo=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230520082618-a298043ef5f1/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230520155239-ccf23adfb278 h1:st4ow9JKy3UuhkwutrbWof2vMFU/YxwBCLYZ1IxJ2Po=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230520155239-ccf23adfb278/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
 github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=

From b8f52d67e164c90cdb216d9cc19ab2f0fc008aa2 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Sat, 20 May 2023 22:21:07 +0200
Subject: [PATCH 29/32] fix(deps): update
 github.com/ggerganov/whisper.cpp/bindings/go digest to 041be06 (#331)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index e0a699ce..adb9c45a 100644
--- a/go.mod
+++ b/go.mod
@@ -4,7 +4,7 @@ go 1.19
 
 require (
 	github.com/donomii/go-rwkv.cpp v0.0.0-20230515123100-6fdd0c338e56
-	github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520170006-429b9785c080
+	github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520182345-041be06d5881
 	github.com/go-audio/wav v1.1.0
 	github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf
 	github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4
diff --git a/go.sum b/go.sum
index 8ef9cacf..20a4b22b 100644
--- a/go.sum
+++ b/go.sum
@@ -22,6 +22,8 @@ github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230515153606-95b02d76b04d
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230515153606-95b02d76b04d/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520170006-429b9785c080 h1:W3itqKpRX9FhheKiAxdmuOBy/mjDfMf2G1vcuFIYqZc=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520170006-429b9785c080/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
+github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520182345-041be06d5881 h1:dafqVivljYk51VLFnnpTXJnfWDe637EobWZ1l8PyEf8=
+github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520182345-041be06d5881/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
 github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
 github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs=
 github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA=

From 482a83886e5cbb738e874ada43afd05f67d761a8 Mon Sep 17 00:00:00 2001
From: "ci-robbot [bot]" <105103991+ci-robbot@users.noreply.github.com>
Date: Sun, 21 May 2023 00:40:17 +0200
Subject: [PATCH 30/32] :arrow_up: Update ggerganov/whisper.cpp (#332)

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 37a0d8a9..393e7ce3 100644
--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,7 @@ GPT4ALL_VERSION?=914519e772fd78c15691dcd0b8bac60d6af514ec
 GOGPT2_VERSION?=7bff56f0224502c1c9ed6258d2a17e8084628827
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=07166da10cb2a9e8854395a4f210464dcea76e47
-WHISPER_CPP_VERSION?=bc89f285d8b025867cae87421824f2dea1c8899f
+WHISPER_CPP_VERSION?=041be06d5881d3c759cc4ed45d655804361237cd
 BERT_VERSION?=cea1ed76a7f48ef386a8e369f6c82c48cdf2d551
 BLOOMZ_VERSION?=e9366e82abdfe70565644fbfae9651976714efd1
 BUILD_TYPE?=

From 91bdad1d12fb8e7ae4181803c188f3054634b13e Mon Sep 17 00:00:00 2001
From: mudler <mudler@mocaccino.org>
Date: Sun, 21 May 2023 00:41:11 +0200
Subject: [PATCH 31/32] docs: fix typo

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index e71a56de..6b56e4cd 100644
--- a/README.md
+++ b/README.md
@@ -868,7 +868,7 @@ curl http://localhost:8080/models/apply -H "Content-Type: application/json" -d '
         {
             "uri": "<additional_file>",
             "sha256": "<additional_file_hash>",
-            "name": "<additional_file_name>"
+            "filename": "<additional_file_name>"
         },
       "overrides": { "backend": "...", "f16": true }
      ]

From 3c07e11e736bade6607b85b7ddb6c5b507105ec2 Mon Sep 17 00:00:00 2001
From: mudler <mudler@mocaccino.org>
Date: Sun, 21 May 2023 00:45:24 +0200
Subject: [PATCH 32/32] docs: update README

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 6b56e4cd..43a7c99d 100644
--- a/README.md
+++ b/README.md
@@ -39,6 +39,7 @@ LocalAI uses C++ bindings for optimizing speed. It is based on [llama.cpp](https
 
 ## News
 
+- 21-05-2023: __v1.14.0__ released. Minor updates to the `/models/apply` endpoint, `llama.cpp` backend updated including https://github.com/ggerganov/llama.cpp/pull/1508 which breaks compatibility with older models. `gpt4all` is still compatible with the old format. 
 - 19-05-2023: __v1.13.0__ released! 🔥🔥 updates to the `gpt4all` and `llama` backend, consolidated CUDA support ( https://github.com/go-skynet/LocalAI/pull/310 thanks to @bubthegreat and @Thireus ), preliminar support for [installing models via API](https://github.com/go-skynet/LocalAI#advanced-prepare-models-using-the-api).
 - 17-05-2023:  __v1.12.0__ released! 🔥🔥 Minor fixes, plus CUDA (https://github.com/go-skynet/LocalAI/pull/258) support for `llama.cpp`-compatible models and image generation (https://github.com/go-skynet/LocalAI/pull/272).
 - 16-05-2023: 🔥🔥🔥 Experimental support for CUDA (https://github.com/go-skynet/LocalAI/pull/258) in the `llama.cpp` backend and Stable diffusion CPU image generation (https://github.com/go-skynet/LocalAI/pull/272) in `master`.