mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-20 10:35:01 +00:00
cleanup: drop bloomz and ggllm as now supported by llama.cpp (#1217)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
f227e918f9
commit
c62504ac92
6 changed files with 2 additions and 294 deletions
40
Makefile
40
Makefile
|
@ -30,15 +30,9 @@ BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
|
||||||
# go-piper version
|
# go-piper version
|
||||||
PIPER_VERSION?=56b8a81b4760a6fbee1a82e62f007ae7e8f010a7
|
PIPER_VERSION?=56b8a81b4760a6fbee1a82e62f007ae7e8f010a7
|
||||||
|
|
||||||
# go-bloomz version
|
|
||||||
BLOOMZ_VERSION?=1834e77b83faafe912ad4092ccf7f77937349e2f
|
|
||||||
|
|
||||||
# stablediffusion version
|
# stablediffusion version
|
||||||
STABLEDIFFUSION_VERSION?=d89260f598afb809279bc72aa0107b4292587632
|
STABLEDIFFUSION_VERSION?=d89260f598afb809279bc72aa0107b4292587632
|
||||||
|
|
||||||
# Go-ggllm
|
|
||||||
GOGGLLM_VERSION?=862477d16eefb0805261c19c9b0d053e3b2b684b
|
|
||||||
|
|
||||||
export BUILD_TYPE?=
|
export BUILD_TYPE?=
|
||||||
export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
|
export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
|
||||||
export CMAKE_ARGS?=
|
export CMAKE_ARGS?=
|
||||||
|
@ -129,7 +123,7 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts)
|
||||||
OPTIONAL_GRPC+=backend-assets/grpc/piper
|
OPTIONAL_GRPC+=backend-assets/grpc/piper
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface backend-assets/grpc/falcon-ggml backend-assets/grpc/bert-embeddings backend-assets/grpc/falcon backend-assets/grpc/bloomz backend-assets/grpc/llama backend-assets/grpc/llama-cpp backend-assets/grpc/llama-stable backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC)
|
ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface backend-assets/grpc/falcon-ggml backend-assets/grpc/bert-embeddings backend-assets/grpc/llama backend-assets/grpc/llama-cpp backend-assets/grpc/llama-stable backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC)
|
||||||
GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC)
|
GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC)
|
||||||
|
|
||||||
# If empty, then we build all
|
# If empty, then we build all
|
||||||
|
@ -146,14 +140,6 @@ gpt4all:
|
||||||
git clone --recurse-submodules $(GPT4ALL_REPO) gpt4all
|
git clone --recurse-submodules $(GPT4ALL_REPO) gpt4all
|
||||||
cd gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1
|
cd gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
|
|
||||||
## go-ggllm
|
|
||||||
go-ggllm:
|
|
||||||
git clone --recurse-submodules https://github.com/mudler/go-ggllm.cpp go-ggllm
|
|
||||||
cd go-ggllm && git checkout -b build $(GOGGLLM_VERSION) && git submodule update --init --recursive --depth 1
|
|
||||||
|
|
||||||
go-ggllm/libggllm.a: go-ggllm
|
|
||||||
$(MAKE) -C go-ggllm BUILD_TYPE=$(BUILD_TYPE) libggllm.a
|
|
||||||
|
|
||||||
## go-piper
|
## go-piper
|
||||||
go-piper:
|
go-piper:
|
||||||
git clone --recurse-submodules https://github.com/mudler/go-piper go-piper
|
git clone --recurse-submodules https://github.com/mudler/go-piper go-piper
|
||||||
|
@ -180,14 +166,6 @@ go-rwkv:
|
||||||
go-rwkv/librwkv.a: go-rwkv
|
go-rwkv/librwkv.a: go-rwkv
|
||||||
cd go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
|
cd go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
|
||||||
|
|
||||||
## bloomz
|
|
||||||
bloomz:
|
|
||||||
git clone --recurse-submodules https://github.com/go-skynet/bloomz.cpp bloomz
|
|
||||||
cd bloomz && git checkout -b build $(BLOOMZ_VERSION) && git submodule update --init --recursive --depth 1
|
|
||||||
|
|
||||||
bloomz/libbloomz.a: bloomz
|
|
||||||
cd bloomz && make libbloomz.a
|
|
||||||
|
|
||||||
go-bert/libgobert.a: go-bert
|
go-bert/libgobert.a: go-bert
|
||||||
$(MAKE) -C go-bert libgobert.a
|
$(MAKE) -C go-bert libgobert.a
|
||||||
|
|
||||||
|
@ -241,7 +219,7 @@ go-llama-stable/libbinding.a: go-llama-stable
|
||||||
go-piper/libpiper_binding.a: go-piper
|
go-piper/libpiper_binding.a: go-piper
|
||||||
$(MAKE) -C go-piper libpiper_binding.a example/main
|
$(MAKE) -C go-piper libpiper_binding.a example/main
|
||||||
|
|
||||||
get-sources: go-llama go-llama-stable go-ggllm go-ggml-transformers gpt4all go-piper go-rwkv whisper.cpp go-bert bloomz go-stable-diffusion
|
get-sources: go-llama go-llama-stable go-ggml-transformers gpt4all go-piper go-rwkv whisper.cpp go-bert go-stable-diffusion
|
||||||
touch $@
|
touch $@
|
||||||
|
|
||||||
replace:
|
replace:
|
||||||
|
@ -250,10 +228,8 @@ replace:
|
||||||
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv
|
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv
|
||||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(shell pwd)/whisper.cpp
|
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(shell pwd)/whisper.cpp
|
||||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(shell pwd)/go-bert
|
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(shell pwd)/go-bert
|
||||||
$(GOCMD) mod edit -replace github.com/go-skynet/bloomz.cpp=$(shell pwd)/bloomz
|
|
||||||
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(shell pwd)/go-stable-diffusion
|
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(shell pwd)/go-stable-diffusion
|
||||||
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(shell pwd)/go-piper
|
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(shell pwd)/go-piper
|
||||||
$(GOCMD) mod edit -replace github.com/mudler/go-ggllm.cpp=$(shell pwd)/go-ggllm
|
|
||||||
|
|
||||||
prepare-sources: get-sources replace
|
prepare-sources: get-sources replace
|
||||||
$(GOCMD) mod download
|
$(GOCMD) mod download
|
||||||
|
@ -269,9 +245,7 @@ rebuild: ## Rebuilds the project
|
||||||
$(MAKE) -C whisper.cpp clean
|
$(MAKE) -C whisper.cpp clean
|
||||||
$(MAKE) -C go-stable-diffusion clean
|
$(MAKE) -C go-stable-diffusion clean
|
||||||
$(MAKE) -C go-bert clean
|
$(MAKE) -C go-bert clean
|
||||||
$(MAKE) -C bloomz clean
|
|
||||||
$(MAKE) -C go-piper clean
|
$(MAKE) -C go-piper clean
|
||||||
$(MAKE) -C go-ggllm clean
|
|
||||||
$(MAKE) build
|
$(MAKE) build
|
||||||
|
|
||||||
prepare: prepare-sources $(OPTIONAL_TARGETS)
|
prepare: prepare-sources $(OPTIONAL_TARGETS)
|
||||||
|
@ -289,10 +263,8 @@ clean: ## Remove build related file
|
||||||
rm -rf ./backend-assets
|
rm -rf ./backend-assets
|
||||||
rm -rf ./go-rwkv
|
rm -rf ./go-rwkv
|
||||||
rm -rf ./go-bert
|
rm -rf ./go-bert
|
||||||
rm -rf ./bloomz
|
|
||||||
rm -rf ./whisper.cpp
|
rm -rf ./whisper.cpp
|
||||||
rm -rf ./go-piper
|
rm -rf ./go-piper
|
||||||
rm -rf ./go-ggllm
|
|
||||||
rm -rf $(BINARY_NAME)
|
rm -rf $(BINARY_NAME)
|
||||||
rm -rf release/
|
rm -rf release/
|
||||||
$(MAKE) -C backend/cpp/llama clean
|
$(MAKE) -C backend/cpp/llama clean
|
||||||
|
@ -418,10 +390,6 @@ protogen-python:
|
||||||
backend-assets/grpc:
|
backend-assets/grpc:
|
||||||
mkdir -p backend-assets/grpc
|
mkdir -p backend-assets/grpc
|
||||||
|
|
||||||
backend-assets/grpc/falcon: backend-assets/grpc go-ggllm/libggllm.a
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggllm LIBRARY_PATH=$(shell pwd)/go-ggllm \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/falcon ./cmd/grpc/falcon/
|
|
||||||
|
|
||||||
backend-assets/grpc/llama: backend-assets/grpc go-llama/libbinding.a
|
backend-assets/grpc/llama: backend-assets/grpc go-llama/libbinding.a
|
||||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
|
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-llama LIBRARY_PATH=$(shell pwd)/go-llama \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-llama LIBRARY_PATH=$(shell pwd)/go-llama \
|
||||||
|
@ -486,10 +454,6 @@ backend-assets/grpc/rwkv: backend-assets/grpc go-rwkv/librwkv.a
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-rwkv LIBRARY_PATH=$(shell pwd)/go-rwkv \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-rwkv LIBRARY_PATH=$(shell pwd)/go-rwkv \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./cmd/grpc/rwkv/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./cmd/grpc/rwkv/
|
||||||
|
|
||||||
backend-assets/grpc/bloomz: backend-assets/grpc bloomz/libbloomz.a
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/bloomz LIBRARY_PATH=$(shell pwd)/bloomz \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bloomz ./cmd/grpc/bloomz/
|
|
||||||
|
|
||||||
backend-assets/grpc/bert-embeddings: backend-assets/grpc go-bert/libgobert.a
|
backend-assets/grpc/bert-embeddings: backend-assets/grpc go-bert/libgobert.a
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-bert LIBRARY_PATH=$(shell pwd)/go-bert \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-bert LIBRARY_PATH=$(shell pwd)/go-bert \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./cmd/grpc/bert-embeddings/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./cmd/grpc/bert-embeddings/
|
||||||
|
|
|
@ -1,23 +0,0 @@
|
||||||
package main
|
|
||||||
|
|
||||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
|
||||||
|
|
||||||
import (
|
|
||||||
"flag"
|
|
||||||
|
|
||||||
bloomz "github.com/go-skynet/LocalAI/pkg/backend/llm/bloomz"
|
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
|
||||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
|
||||||
)
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
flag.Parse()
|
|
||||||
|
|
||||||
if err := grpc.StartServer(*addr, &bloomz.LLM{}); err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,25 +0,0 @@
|
||||||
package main
|
|
||||||
|
|
||||||
// GRPC Falcon server
|
|
||||||
|
|
||||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
|
||||||
|
|
||||||
import (
|
|
||||||
"flag"
|
|
||||||
|
|
||||||
falcon "github.com/go-skynet/LocalAI/pkg/backend/llm/falcon"
|
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
|
||||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
|
||||||
)
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
flag.Parse()
|
|
||||||
|
|
||||||
if err := grpc.StartServer(*addr, &falcon.LLM{}); err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,59 +0,0 @@
|
||||||
package bloomz
|
|
||||||
|
|
||||||
// This is a wrapper to statisfy the GRPC service interface
|
|
||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
|
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
|
||||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
|
||||||
|
|
||||||
"github.com/go-skynet/bloomz.cpp"
|
|
||||||
)
|
|
||||||
|
|
||||||
type LLM struct {
|
|
||||||
base.SingleThread
|
|
||||||
|
|
||||||
bloomz *bloomz.Bloomz
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
|
||||||
model, err := bloomz.New(opts.ModelFile)
|
|
||||||
llm.bloomz = model
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
func buildPredictOptions(opts *pb.PredictOptions) []bloomz.PredictOption {
|
|
||||||
predictOptions := []bloomz.PredictOption{
|
|
||||||
bloomz.SetTemperature(float64(opts.Temperature)),
|
|
||||||
bloomz.SetTopP(float64(opts.TopP)),
|
|
||||||
bloomz.SetTopK(int(opts.TopK)),
|
|
||||||
bloomz.SetTokens(int(opts.Tokens)),
|
|
||||||
bloomz.SetThreads(int(opts.Threads)),
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.Seed != 0 {
|
|
||||||
predictOptions = append(predictOptions, bloomz.SetSeed(int(opts.Seed)))
|
|
||||||
}
|
|
||||||
|
|
||||||
return predictOptions
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
|
||||||
return llm.bloomz.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
|
||||||
}
|
|
||||||
|
|
||||||
// fallback to Predict
|
|
||||||
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
|
||||||
go func() {
|
|
||||||
res, err := llm.bloomz.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println("err: ", err)
|
|
||||||
}
|
|
||||||
results <- res
|
|
||||||
close(results)
|
|
||||||
}()
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
|
@ -1,145 +0,0 @@
|
||||||
package falcon
|
|
||||||
|
|
||||||
// This is a wrapper to statisfy the GRPC service interface
|
|
||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
|
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
|
||||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
|
||||||
|
|
||||||
ggllm "github.com/mudler/go-ggllm.cpp"
|
|
||||||
)
|
|
||||||
|
|
||||||
type LLM struct {
|
|
||||||
base.SingleThread
|
|
||||||
|
|
||||||
falcon *ggllm.Falcon
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
|
||||||
ggllmOpts := []ggllm.ModelOption{}
|
|
||||||
if opts.ContextSize != 0 {
|
|
||||||
ggllmOpts = append(ggllmOpts, ggllm.SetContext(int(opts.ContextSize)))
|
|
||||||
}
|
|
||||||
// F16 doesn't seem to produce good output at all!
|
|
||||||
//if c.F16 {
|
|
||||||
// llamaOpts = append(llamaOpts, llama.EnableF16Memory)
|
|
||||||
//}
|
|
||||||
|
|
||||||
if opts.NGPULayers != 0 {
|
|
||||||
ggllmOpts = append(ggllmOpts, ggllm.SetGPULayers(int(opts.NGPULayers)))
|
|
||||||
}
|
|
||||||
|
|
||||||
ggllmOpts = append(ggllmOpts, ggllm.SetMMap(opts.MMap))
|
|
||||||
ggllmOpts = append(ggllmOpts, ggllm.SetMainGPU(opts.MainGPU))
|
|
||||||
ggllmOpts = append(ggllmOpts, ggllm.SetTensorSplit(opts.TensorSplit))
|
|
||||||
if opts.NBatch != 0 {
|
|
||||||
ggllmOpts = append(ggllmOpts, ggllm.SetNBatch(int(opts.NBatch)))
|
|
||||||
} else {
|
|
||||||
ggllmOpts = append(ggllmOpts, ggllm.SetNBatch(512))
|
|
||||||
}
|
|
||||||
|
|
||||||
model, err := ggllm.New(opts.ModelFile, ggllmOpts...)
|
|
||||||
llm.falcon = model
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
func buildPredictOptions(opts *pb.PredictOptions) []ggllm.PredictOption {
|
|
||||||
predictOptions := []ggllm.PredictOption{
|
|
||||||
ggllm.SetTemperature(float64(opts.Temperature)),
|
|
||||||
ggllm.SetTopP(float64(opts.TopP)),
|
|
||||||
ggllm.SetTopK(int(opts.TopK)),
|
|
||||||
ggllm.SetTokens(int(opts.Tokens)),
|
|
||||||
ggllm.SetThreads(int(opts.Threads)),
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.PromptCacheAll {
|
|
||||||
predictOptions = append(predictOptions, ggllm.EnablePromptCacheAll)
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.PromptCacheRO {
|
|
||||||
predictOptions = append(predictOptions, ggllm.EnablePromptCacheRO)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Expected absolute path
|
|
||||||
if opts.PromptCachePath != "" {
|
|
||||||
predictOptions = append(predictOptions, ggllm.SetPathPromptCache(opts.PromptCachePath))
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.Mirostat != 0 {
|
|
||||||
predictOptions = append(predictOptions, ggllm.SetMirostat(int(opts.Mirostat)))
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.MirostatETA != 0 {
|
|
||||||
predictOptions = append(predictOptions, ggllm.SetMirostatETA(float64(opts.MirostatETA)))
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.MirostatTAU != 0 {
|
|
||||||
predictOptions = append(predictOptions, ggllm.SetMirostatTAU(float64(opts.MirostatTAU)))
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.Debug {
|
|
||||||
predictOptions = append(predictOptions, ggllm.Debug)
|
|
||||||
}
|
|
||||||
|
|
||||||
predictOptions = append(predictOptions, ggllm.SetStopWords(opts.StopPrompts...))
|
|
||||||
|
|
||||||
if opts.PresencePenalty != 0 {
|
|
||||||
predictOptions = append(predictOptions, ggllm.SetPenalty(float64(opts.PresencePenalty)))
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.NKeep != 0 {
|
|
||||||
predictOptions = append(predictOptions, ggllm.SetNKeep(int(opts.NKeep)))
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.Batch != 0 {
|
|
||||||
predictOptions = append(predictOptions, ggllm.SetBatch(int(opts.Batch)))
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.IgnoreEOS {
|
|
||||||
predictOptions = append(predictOptions, ggllm.IgnoreEOS)
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.Seed != 0 {
|
|
||||||
predictOptions = append(predictOptions, ggllm.SetSeed(int(opts.Seed)))
|
|
||||||
}
|
|
||||||
|
|
||||||
//predictOptions = append(predictOptions, llama.SetLogitBias(c.Seed))
|
|
||||||
|
|
||||||
predictOptions = append(predictOptions, ggllm.SetFrequencyPenalty(float64(opts.FrequencyPenalty)))
|
|
||||||
predictOptions = append(predictOptions, ggllm.SetMlock(opts.MLock))
|
|
||||||
predictOptions = append(predictOptions, ggllm.SetMemoryMap(opts.MMap))
|
|
||||||
predictOptions = append(predictOptions, ggllm.SetPredictionMainGPU(opts.MainGPU))
|
|
||||||
predictOptions = append(predictOptions, ggllm.SetPredictionTensorSplit(opts.TensorSplit))
|
|
||||||
predictOptions = append(predictOptions, ggllm.SetTailFreeSamplingZ(float64(opts.TailFreeSamplingZ)))
|
|
||||||
predictOptions = append(predictOptions, ggllm.SetTypicalP(float64(opts.TypicalP)))
|
|
||||||
return predictOptions
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
|
||||||
return llm.falcon.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
|
||||||
|
|
||||||
predictOptions := buildPredictOptions(opts)
|
|
||||||
|
|
||||||
predictOptions = append(predictOptions, ggllm.SetTokenCallback(func(token string) bool {
|
|
||||||
if token == "<|endoftext|>" {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
results <- token
|
|
||||||
return true
|
|
||||||
}))
|
|
||||||
|
|
||||||
go func() {
|
|
||||||
_, err := llm.falcon.Predict(opts.Prompt, predictOptions...)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println("err: ", err)
|
|
||||||
}
|
|
||||||
close(results)
|
|
||||||
}()
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
|
@ -18,7 +18,6 @@ const (
|
||||||
LlamaBackend = "llama"
|
LlamaBackend = "llama"
|
||||||
LlamaStableBackend = "llama-stable"
|
LlamaStableBackend = "llama-stable"
|
||||||
LLamaCPP = "llama-cpp"
|
LLamaCPP = "llama-cpp"
|
||||||
BloomzBackend = "bloomz"
|
|
||||||
StarcoderBackend = "starcoder"
|
StarcoderBackend = "starcoder"
|
||||||
GPTJBackend = "gptj"
|
GPTJBackend = "gptj"
|
||||||
DollyBackend = "dolly"
|
DollyBackend = "dolly"
|
||||||
|
@ -30,7 +29,6 @@ const (
|
||||||
Gpt4AllMptBackend = "gpt4all-mpt"
|
Gpt4AllMptBackend = "gpt4all-mpt"
|
||||||
Gpt4AllJBackend = "gpt4all-j"
|
Gpt4AllJBackend = "gpt4all-j"
|
||||||
Gpt4All = "gpt4all"
|
Gpt4All = "gpt4all"
|
||||||
FalconBackend = "falcon"
|
|
||||||
FalconGGMLBackend = "falcon-ggml"
|
FalconGGMLBackend = "falcon-ggml"
|
||||||
|
|
||||||
BertEmbeddingsBackend = "bert-embeddings"
|
BertEmbeddingsBackend = "bert-embeddings"
|
||||||
|
@ -46,7 +44,6 @@ var AutoLoadBackends []string = []string{
|
||||||
LlamaStableBackend,
|
LlamaStableBackend,
|
||||||
LlamaBackend,
|
LlamaBackend,
|
||||||
Gpt4All,
|
Gpt4All,
|
||||||
FalconBackend,
|
|
||||||
GPTNeoXBackend,
|
GPTNeoXBackend,
|
||||||
BertEmbeddingsBackend,
|
BertEmbeddingsBackend,
|
||||||
FalconGGMLBackend,
|
FalconGGMLBackend,
|
||||||
|
@ -56,7 +53,6 @@ var AutoLoadBackends []string = []string{
|
||||||
MPTBackend,
|
MPTBackend,
|
||||||
ReplitBackend,
|
ReplitBackend,
|
||||||
StarcoderBackend,
|
StarcoderBackend,
|
||||||
BloomzBackend,
|
|
||||||
RwkvBackend,
|
RwkvBackend,
|
||||||
WhisperBackend,
|
WhisperBackend,
|
||||||
StableDiffusionBackend,
|
StableDiffusionBackend,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue