From 8814b31805b8b77a467fcaf4ce25aa37f36f59dc Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 7 Aug 2024 23:35:55 +0200 Subject: [PATCH] chore: drop gpt4all.cpp (#3106) chore: drop gpt4all gpt4all is already supported in llama.cpp - the backend was kept for keeping compatibility with old gpt4all models (prior to gguf format). It is good time now to clean up and remove it to slim the compilation process. Signed-off-by: Ettore Di Giacinto --- Makefile | 42 +------------------- backend/go/llm/gpt4all/gpt4all.go | 62 ------------------------------ backend/go/llm/gpt4all/main.go | 21 ---------- core/cli/worker/worker_llamacpp.go | 2 +- core/cli/worker/worker_p2p.go | 2 +- core/http/app_test.go | 40 ------------------- core/http/routes/ui.go | 2 +- core/startup/startup.go | 2 +- pkg/model/initializers.go | 11 +----- 9 files changed, 7 insertions(+), 177 deletions(-) delete mode 100644 backend/go/llm/gpt4all/gpt4all.go delete mode 100644 backend/go/llm/gpt4all/main.go diff --git a/Makefile b/Makefile index 476caac6..bcbdbe83 100644 --- a/Makefile +++ b/Makefile @@ -10,10 +10,6 @@ GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be CPPLLAMA_VERSION?=1e6f6554aa11fa10160a5fda689e736c3c34169f -# gpt4all version -GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all -GPT4ALL_VERSION?=27a8b020c36b0df8f8b82a252d261cda47cf44b8 - # go-rwkv version RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6 @@ -190,7 +186,6 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server -ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store @@ -253,18 +248,6 @@ sources/go-piper: sources/go-piper/libpiper_binding.a: sources/go-piper $(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o -## GPT4ALL -sources/gpt4all: - mkdir -p sources/gpt4all - cd sources/gpt4all && \ - git init && \ - git remote add origin $(GPT4ALL_REPO) && \ - git fetch origin && \ - git checkout $(GPT4ALL_VERSION) && \ - git submodule update --init --recursive --depth 1 --single-branch - -sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all - $(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a ## RWKV sources/go-rwkv.cpp: @@ -318,7 +301,7 @@ sources/whisper.cpp: sources/whisper.cpp/libwhisper.a: sources/whisper.cpp cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a -get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp +get-sources: sources/go-llama.cpp sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp replace: $(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp @@ -328,7 +311,6 @@ replace: $(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream $(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper $(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion - $(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang $(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp dropreplace: @@ -339,7 +321,6 @@ dropreplace: $(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream $(GOCMD) mod edit -dropreplace github.com/mudler/go-piper $(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion - $(GOCMD) mod edit -dropreplace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang $(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp prepare-sources: get-sources replace @@ -349,7 +330,6 @@ prepare-sources: get-sources replace rebuild: ## Rebuilds the project $(GOCMD) clean -cache $(MAKE) -C sources/go-llama.cpp clean - $(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ clean $(MAKE) -C sources/go-rwkv.cpp clean $(MAKE) -C sources/whisper.cpp clean $(MAKE) -C sources/go-stable-diffusion clean @@ -469,8 +449,7 @@ test: prepare test-models/testmodel.ggml grpcs export GO_TAGS="tts stablediffusion debug" $(MAKE) prepare-test HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ - $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS) - $(MAKE) test-gpt4all + $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS) $(MAKE) test-llama $(MAKE) test-llama-gguf $(MAKE) test-tts @@ -500,10 +479,6 @@ teardown-e2e: rm -rf $(TEST_DIR) || true docker stop $$(docker ps -q --filter ancestor=localai-tests) -test-gpt4all: prepare-test - TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ - $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts 5 -v -r $(TEST_PATHS) - test-llama: prepare-test TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r $(TEST_PATHS) @@ -730,12 +705,6 @@ backend-assets/espeak-ng-data: sources/go-piper sources/go-piper/libpiper_bindin mkdir -p backend-assets/espeak-ng-data @cp -rf sources/go-piper/piper-phonemize/pi/share/espeak-ng-data/. backend-assets/espeak-ng-data -backend-assets/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a - mkdir -p backend-assets/gpt4all - @cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.so backend-assets/gpt4all/ || true - @cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true - @cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true - backend-assets/grpc: protogen-go replace mkdir -p backend-assets/grpc @@ -746,13 +715,6 @@ ifneq ($(UPX),) $(UPX) backend-assets/grpc/bert-embeddings endif -backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/ -ifneq ($(UPX),) - $(UPX) backend-assets/grpc/gpt4all -endif - backend-assets/grpc/huggingface: backend-assets/grpc $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/ ifneq ($(UPX),) diff --git a/backend/go/llm/gpt4all/gpt4all.go b/backend/go/llm/gpt4all/gpt4all.go deleted file mode 100644 index 9caab48c..00000000 --- a/backend/go/llm/gpt4all/gpt4all.go +++ /dev/null @@ -1,62 +0,0 @@ -package main - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - "fmt" - - "github.com/mudler/LocalAI/pkg/grpc/base" - pb "github.com/mudler/LocalAI/pkg/grpc/proto" - gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang" -) - -type LLM struct { - base.SingleThread - - gpt4all *gpt4all.Model -} - -func (llm *LLM) Load(opts *pb.ModelOptions) error { - model, err := gpt4all.New(opts.ModelFile, - gpt4all.SetThreads(int(opts.Threads)), - gpt4all.SetLibrarySearchPath(opts.LibrarySearchPath)) - llm.gpt4all = model - return err -} - -func buildPredictOptions(opts *pb.PredictOptions) []gpt4all.PredictOption { - predictOptions := []gpt4all.PredictOption{ - gpt4all.SetTemperature(float64(opts.Temperature)), - gpt4all.SetTopP(float64(opts.TopP)), - gpt4all.SetTopK(int(opts.TopK)), - gpt4all.SetTokens(int(opts.Tokens)), - } - - if opts.Batch != 0 { - predictOptions = append(predictOptions, gpt4all.SetBatch(int(opts.Batch))) - } - return predictOptions -} - -func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) { - return llm.gpt4all.Predict(opts.Prompt, buildPredictOptions(opts)...) -} - -func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error { - predictOptions := buildPredictOptions(opts) - - go func() { - llm.gpt4all.SetTokenCallback(func(token string) bool { - results <- token - return true - }) - _, err := llm.gpt4all.Predict(opts.Prompt, predictOptions...) - if err != nil { - fmt.Println("err: ", err) - } - llm.gpt4all.SetTokenCallback(nil) - close(results) - }() - - return nil -} diff --git a/backend/go/llm/gpt4all/main.go b/backend/go/llm/gpt4all/main.go deleted file mode 100644 index acf44087..00000000 --- a/backend/go/llm/gpt4all/main.go +++ /dev/null @@ -1,21 +0,0 @@ -package main - -// Note: this is started internally by LocalAI and a server is allocated for each model - -import ( - "flag" - - grpc "github.com/mudler/LocalAI/pkg/grpc" -) - -var ( - addr = flag.String("addr", "localhost:50051", "the address to connect to") -) - -func main() { - flag.Parse() - - if err := grpc.StartServer(*addr, &LLM{}); err != nil { - panic(err) - } -} diff --git a/core/cli/worker/worker_llamacpp.go b/core/cli/worker/worker_llamacpp.go index 5598a485..2baf51ec 100644 --- a/core/cli/worker/worker_llamacpp.go +++ b/core/cli/worker/worker_llamacpp.go @@ -21,7 +21,7 @@ func (r *LLamaCPP) Run(ctx *cliContext.Context) error { err := assets.ExtractFiles(ctx.BackendAssets, r.BackendAssetsPath) log.Debug().Msgf("Extracting backend assets files to %s", r.BackendAssetsPath) if err != nil { - log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly, like gpt4all)", err) + log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err) } if len(os.Args) < 4 { diff --git a/core/cli/worker/worker_p2p.go b/core/cli/worker/worker_p2p.go index ddb3518c..93a365cb 100644 --- a/core/cli/worker/worker_p2p.go +++ b/core/cli/worker/worker_p2p.go @@ -33,7 +33,7 @@ func (r *P2P) Run(ctx *cliContext.Context) error { err := assets.ExtractFiles(ctx.BackendAssets, r.BackendAssetsPath) log.Debug().Msgf("Extracting backend assets files to %s", r.BackendAssetsPath) if err != nil { - log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly, like gpt4all)", err) + log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err) } // Check if the token is set diff --git a/core/http/app_test.go b/core/http/app_test.go index b21ad25a..a837e20c 100644 --- a/core/http/app_test.go +++ b/core/http/app_test.go @@ -563,32 +563,6 @@ var _ = Describe("API test", func() { Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res)) Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason)) }) - - It("runs gpt4all", Label("gpt4all"), func() { - if runtime.GOOS != "linux" { - Skip("test supported only on linux") - } - - response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{ - URL: "github:go-skynet/model-gallery/gpt4all-j.yaml", - Name: "gpt4all-j", - }) - - Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response)) - - uuid := response["uuid"].(string) - - Eventually(func() bool { - response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid) - return response["processed"].(bool) - }, "960s", "10s").Should(Equal(true)) - - resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-j", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "How are you?"}}}) - Expect(err).ToNot(HaveOccurred()) - Expect(len(resp.Choices)).To(Equal(1)) - Expect(resp.Choices[0].Message.Content).To(ContainSubstring("well")) - }) - }) }) @@ -792,20 +766,6 @@ var _ = Describe("API test", func() { Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty()) }) - It("can generate completions from model configs", func() { - resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "gpt4all", Prompt: testPrompt}) - Expect(err).ToNot(HaveOccurred()) - Expect(len(resp.Choices)).To(Equal(1)) - Expect(resp.Choices[0].Text).ToNot(BeEmpty()) - }) - - It("can generate chat completions from model configs", func() { - resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}}) - Expect(err).ToNot(HaveOccurred()) - Expect(len(resp.Choices)).To(Equal(1)) - Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty()) - }) - It("returns errors", func() { _, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: testPrompt}) Expect(err).To(HaveOccurred()) diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go index 4f8afd3c..2996e9dc 100644 --- a/core/http/routes/ui.go +++ b/core/http/routes/ui.go @@ -267,7 +267,7 @@ func RegisterUIRoutes(app *fiber.App, return c.SendString(elements.ProgressBar("100")) } if status.Error != nil { - // TODO: instead of deleting the job, we should keep it in the cache and make it dismissable + // TODO: instead of deleting the job, we should keep it in the cache and make it dismissable by the user processingModels.DeleteUUID(jobUID) return c.SendString(elements.ErrorProgress(status.Error.Error(), status.GalleryModelName)) } diff --git a/core/startup/startup.go b/core/startup/startup.go index 55f930a4..3565d196 100644 --- a/core/startup/startup.go +++ b/core/startup/startup.go @@ -106,7 +106,7 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode err := assets.ExtractFiles(options.BackendAssets, options.AssetsDestination) log.Debug().Msgf("Extracting backend assets files to %s", options.AssetsDestination) if err != nil { - log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly, like gpt4all)", err) + log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err) } } diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index 88a08f28..11980f03 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -45,11 +45,6 @@ const ( LLamaCPPGRPC = "llama-cpp-grpc" - Gpt4AllLlamaBackend = "gpt4all-llama" - Gpt4AllMptBackend = "gpt4all-mpt" - Gpt4AllJBackend = "gpt4all-j" - Gpt4All = "gpt4all" - BertEmbeddingsBackend = "bert-embeddings" RwkvBackend = "rwkv" WhisperBackend = "whisper" @@ -144,11 +139,10 @@ ENTRY: // sets a priority list - first has more priority priorityList := []string{ - // First llama.cpp(variants) and llama-ggml to follow. // We keep the fallback to prevent that if the llama.cpp variants // that depends on shared libs if breaks have still a safety net. - LLamaCPP, LlamaGGML, Gpt4All, LLamaCPPFallback, + LLamaCPP, LlamaGGML, LLamaCPPFallback, } toTheEnd := []string{ @@ -434,9 +428,6 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (client grpc.Backend, err e var backendToConsume string switch backend { - case Gpt4AllLlamaBackend, Gpt4AllMptBackend, Gpt4AllJBackend, Gpt4All: - o.gRPCOptions.LibrarySearchPath = filepath.Join(o.assetDir, "backend-assets", "gpt4all") - backendToConsume = Gpt4All case PiperBackend: o.gRPCOptions.LibrarySearchPath = filepath.Join(o.assetDir, "backend-assets", "espeak-ng-data") backendToConsume = PiperBackend