mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-28 22:44:59 +00:00
Usage Features (#863)
This commit is contained in:
parent
2bacd0180d
commit
8cb1061c11
40 changed files with 1222 additions and 317 deletions
|
@ -4,6 +4,7 @@ package bert
|
|||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
import (
|
||||
bert "github.com/go-skynet/go-bert.cpp"
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
|
@ -15,12 +16,21 @@ type Embeddings struct {
|
|||
}
|
||||
|
||||
func (llm *Embeddings) Load(opts *pb.ModelOptions) error {
|
||||
if llm.Base.State != pb.StatusResponse_UNINITIALIZED {
|
||||
log.Warn().Msgf("bert backend loading %s while already in state %s!", opts.Model, llm.Base.State.String())
|
||||
}
|
||||
|
||||
llm.Base.Lock()
|
||||
defer llm.Base.Unlock()
|
||||
model, err := bert.New(opts.ModelFile)
|
||||
llm.bert = model
|
||||
return err
|
||||
}
|
||||
|
||||
func (llm *Embeddings) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
|
||||
llm.Base.Lock()
|
||||
defer llm.Base.Unlock()
|
||||
|
||||
if len(opts.EmbeddingTokens) > 0 {
|
||||
tokens := []int{}
|
||||
for _, t := range opts.EmbeddingTokens {
|
||||
|
|
|
@ -7,6 +7,7 @@ import (
|
|||
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
"github.com/go-skynet/bloomz.cpp"
|
||||
)
|
||||
|
@ -18,6 +19,12 @@ type LLM struct {
|
|||
}
|
||||
|
||||
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||
if llm.Base.State != pb.StatusResponse_UNINITIALIZED {
|
||||
log.Warn().Msgf("bloomz backend loading %s while already in state %s!", opts.Model, llm.Base.State.String())
|
||||
}
|
||||
|
||||
llm.Base.Lock()
|
||||
defer llm.Base.Unlock()
|
||||
model, err := bloomz.New(opts.ModelFile)
|
||||
llm.bloomz = model
|
||||
return err
|
||||
|
@ -40,11 +47,16 @@ func buildPredictOptions(opts *pb.PredictOptions) []bloomz.PredictOption {
|
|||
}
|
||||
|
||||
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
||||
llm.Base.Lock()
|
||||
defer llm.Base.Unlock()
|
||||
|
||||
return llm.bloomz.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
}
|
||||
|
||||
// fallback to Predict
|
||||
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||
llm.Base.Lock()
|
||||
|
||||
go func() {
|
||||
res, err := llm.bloomz.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
|
||||
|
@ -53,6 +65,7 @@ func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) erro
|
|||
}
|
||||
results <- res
|
||||
close(results)
|
||||
llm.Base.Unlock()
|
||||
}()
|
||||
|
||||
return nil
|
||||
|
|
|
@ -7,6 +7,7 @@ import (
|
|||
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
ggllm "github.com/mudler/go-ggllm.cpp"
|
||||
)
|
||||
|
@ -18,6 +19,13 @@ type LLM struct {
|
|||
}
|
||||
|
||||
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||
if llm.Base.State != pb.StatusResponse_UNINITIALIZED {
|
||||
log.Warn().Msgf("falcon backend loading %s while already in state %s!", opts.Model, llm.Base.State.String())
|
||||
}
|
||||
|
||||
llm.Base.Lock()
|
||||
defer llm.Base.Unlock()
|
||||
|
||||
ggllmOpts := []ggllm.ModelOption{}
|
||||
if opts.ContextSize != 0 {
|
||||
ggllmOpts = append(ggllmOpts, ggllm.SetContext(int(opts.ContextSize)))
|
||||
|
@ -118,10 +126,14 @@ func buildPredictOptions(opts *pb.PredictOptions) []ggllm.PredictOption {
|
|||
}
|
||||
|
||||
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
||||
llm.Base.Lock()
|
||||
defer llm.Base.Unlock()
|
||||
return llm.falcon.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
}
|
||||
|
||||
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||
llm.Base.Lock()
|
||||
|
||||
predictOptions := buildPredictOptions(opts)
|
||||
|
||||
predictOptions = append(predictOptions, ggllm.SetTokenCallback(func(token string) bool {
|
||||
|
@ -138,6 +150,7 @@ func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) erro
|
|||
fmt.Println("err: ", err)
|
||||
}
|
||||
close(results)
|
||||
llm.Base.Unlock()
|
||||
}()
|
||||
|
||||
return nil
|
||||
|
|
|
@ -8,6 +8,7 @@ import (
|
|||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
type LLM struct {
|
||||
|
@ -17,6 +18,13 @@ type LLM struct {
|
|||
}
|
||||
|
||||
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||
if llm.Base.State != pb.StatusResponse_UNINITIALIZED {
|
||||
log.Warn().Msgf("gpt4all backend loading %s while already in state %s!", opts.Model, llm.Base.State.String())
|
||||
}
|
||||
|
||||
llm.Base.Lock()
|
||||
defer llm.Base.Unlock()
|
||||
|
||||
model, err := gpt4all.New(opts.ModelFile,
|
||||
gpt4all.SetThreads(int(opts.Threads)),
|
||||
gpt4all.SetLibrarySearchPath(opts.LibrarySearchPath))
|
||||
|
@ -39,10 +47,15 @@ func buildPredictOptions(opts *pb.PredictOptions) []gpt4all.PredictOption {
|
|||
}
|
||||
|
||||
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
||||
llm.Base.Lock()
|
||||
defer llm.Base.Unlock()
|
||||
|
||||
return llm.gpt4all.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
}
|
||||
|
||||
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||
llm.Base.Lock()
|
||||
|
||||
predictOptions := buildPredictOptions(opts)
|
||||
|
||||
go func() {
|
||||
|
@ -56,6 +69,7 @@ func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) erro
|
|||
}
|
||||
llm.gpt4all.SetTokenCallback(nil)
|
||||
close(results)
|
||||
llm.Base.Unlock()
|
||||
}()
|
||||
|
||||
return nil
|
||||
|
|
|
@ -8,6 +8,7 @@ import (
|
|||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
"github.com/go-skynet/LocalAI/pkg/langchain"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
type LLM struct {
|
||||
|
@ -18,12 +19,21 @@ type LLM struct {
|
|||
}
|
||||
|
||||
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||
if llm.Base.State != pb.StatusResponse_UNINITIALIZED {
|
||||
log.Warn().Msgf("langchain backend loading %s while already in state %s!", opts.Model, llm.Base.State.String())
|
||||
}
|
||||
|
||||
llm.Base.Lock()
|
||||
defer llm.Base.Unlock()
|
||||
llm.langchain, _ = langchain.NewHuggingFace(opts.Model)
|
||||
llm.model = opts.Model
|
||||
return nil
|
||||
}
|
||||
|
||||
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
||||
llm.Base.Lock()
|
||||
defer llm.Base.Unlock()
|
||||
|
||||
o := []langchain.PredictOption{
|
||||
langchain.SetModel(llm.model),
|
||||
langchain.SetMaxTokens(int(opts.Tokens)),
|
||||
|
@ -38,6 +48,7 @@ func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
|||
}
|
||||
|
||||
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||
llm.Base.Lock()
|
||||
o := []langchain.PredictOption{
|
||||
langchain.SetModel(llm.model),
|
||||
langchain.SetMaxTokens(int(opts.Tokens)),
|
||||
|
@ -52,6 +63,7 @@ func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) erro
|
|||
}
|
||||
results <- res.Completion
|
||||
close(results)
|
||||
llm.Base.Unlock()
|
||||
}()
|
||||
|
||||
return nil
|
||||
|
|
|
@ -8,6 +8,7 @@ import (
|
|||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
"github.com/go-skynet/go-llama.cpp"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
type LLM struct {
|
||||
|
@ -18,6 +19,13 @@ type LLM struct {
|
|||
|
||||
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||
|
||||
if llm.Base.State != pb.StatusResponse_UNINITIALIZED {
|
||||
log.Warn().Msgf("llama backend loading %s while already in state %s!", opts.Model, llm.Base.State.String())
|
||||
}
|
||||
|
||||
llm.Base.Lock()
|
||||
defer llm.Base.Unlock()
|
||||
|
||||
ropeFreqBase := float32(10000)
|
||||
ropeFreqScale := float32(1)
|
||||
|
||||
|
@ -73,6 +81,7 @@ func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
|||
|
||||
model, err := llama.New(opts.ModelFile, llamaOpts...)
|
||||
llm.llama = model
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
|
@ -167,10 +176,14 @@ func buildPredictOptions(opts *pb.PredictOptions) []llama.PredictOption {
|
|||
}
|
||||
|
||||
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
||||
llm.Base.Lock()
|
||||
defer llm.Base.Unlock()
|
||||
return llm.llama.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
}
|
||||
|
||||
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||
llm.Base.Lock()
|
||||
|
||||
predictOptions := buildPredictOptions(opts)
|
||||
|
||||
predictOptions = append(predictOptions, llama.SetTokenCallback(func(token string) bool {
|
||||
|
@ -184,12 +197,16 @@ func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) erro
|
|||
fmt.Println("err: ", err)
|
||||
}
|
||||
close(results)
|
||||
llm.Base.Unlock()
|
||||
}()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (llm *LLM) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
|
||||
llm.Base.Lock()
|
||||
defer llm.Base.Unlock()
|
||||
|
||||
predictOptions := buildPredictOptions(opts)
|
||||
|
||||
if len(opts.EmbeddingTokens) > 0 {
|
||||
|
@ -202,3 +219,18 @@ func (llm *LLM) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
|
|||
|
||||
return llm.llama.Embeddings(opts.Embeddings, predictOptions...)
|
||||
}
|
||||
|
||||
func (llm *LLM) TokenizeString(opts *pb.PredictOptions) (pb.TokenizationResponse, error) {
|
||||
llm.Base.Lock()
|
||||
defer llm.Base.Unlock()
|
||||
|
||||
predictOptions := buildPredictOptions(opts)
|
||||
l, tokens, err := llm.llama.TokenizeString(opts.Prompt, predictOptions...)
|
||||
if err != nil {
|
||||
return pb.TokenizationResponse{}, err
|
||||
}
|
||||
return pb.TokenizationResponse{
|
||||
Length: l,
|
||||
Tokens: tokens,
|
||||
}, nil
|
||||
}
|
||||
|
|
|
@ -9,6 +9,7 @@ import (
|
|||
"github.com/donomii/go-rwkv.cpp"
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
const tokenizerSuffix = ".tokenizer.json"
|
||||
|
@ -20,6 +21,12 @@ type LLM struct {
|
|||
}
|
||||
|
||||
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||
if llm.Base.State != pb.StatusResponse_UNINITIALIZED {
|
||||
log.Warn().Msgf("rwkv backend loading %s while already in state %s!", opts.Model, llm.Base.State.String())
|
||||
}
|
||||
|
||||
llm.Base.Lock()
|
||||
defer llm.Base.Unlock()
|
||||
modelPath := filepath.Dir(opts.ModelFile)
|
||||
modelFile := filepath.Base(opts.ModelFile)
|
||||
model := rwkv.LoadFiles(opts.ModelFile, filepath.Join(modelPath, modelFile+tokenizerSuffix), uint32(opts.GetThreads()))
|
||||
|
@ -32,6 +39,8 @@ func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
|||
}
|
||||
|
||||
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
||||
llm.Base.Lock()
|
||||
defer llm.Base.Unlock()
|
||||
|
||||
stopWord := "\n"
|
||||
if len(opts.StopPrompts) > 0 {
|
||||
|
@ -48,6 +57,7 @@ func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
|||
}
|
||||
|
||||
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||
llm.Base.Lock()
|
||||
go func() {
|
||||
|
||||
stopWord := "\n"
|
||||
|
@ -65,6 +75,7 @@ func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) erro
|
|||
return true
|
||||
})
|
||||
close(results)
|
||||
llm.Base.Unlock()
|
||||
}()
|
||||
|
||||
return nil
|
||||
|
|
|
@ -7,6 +7,7 @@ import (
|
|||
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
||||
)
|
||||
|
@ -18,17 +19,27 @@ type Dolly struct {
|
|||
}
|
||||
|
||||
func (llm *Dolly) Load(opts *pb.ModelOptions) error {
|
||||
if llm.Base.State != pb.StatusResponse_UNINITIALIZED {
|
||||
log.Warn().Msgf("dolly backend loading %s while already in state %s!", opts.Model, llm.Base.State.String())
|
||||
}
|
||||
|
||||
llm.Base.Lock()
|
||||
defer llm.Base.Unlock()
|
||||
model, err := transformers.NewDolly(opts.ModelFile)
|
||||
llm.dolly = model
|
||||
return err
|
||||
}
|
||||
|
||||
func (llm *Dolly) Predict(opts *pb.PredictOptions) (string, error) {
|
||||
llm.Base.Lock()
|
||||
defer llm.Base.Unlock()
|
||||
return llm.dolly.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
}
|
||||
|
||||
// fallback to Predict
|
||||
func (llm *Dolly) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||
llm.Base.Lock()
|
||||
|
||||
go func() {
|
||||
res, err := llm.dolly.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
|
||||
|
@ -37,6 +48,7 @@ func (llm *Dolly) PredictStream(opts *pb.PredictOptions, results chan string) er
|
|||
}
|
||||
results <- res
|
||||
close(results)
|
||||
llm.Base.Unlock()
|
||||
}()
|
||||
|
||||
return nil
|
||||
|
|
|
@ -7,6 +7,7 @@ import (
|
|||
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
||||
)
|
||||
|
@ -18,17 +19,26 @@ type Falcon struct {
|
|||
}
|
||||
|
||||
func (llm *Falcon) Load(opts *pb.ModelOptions) error {
|
||||
if llm.Base.State != pb.StatusResponse_UNINITIALIZED {
|
||||
log.Warn().Msgf("transformers-falcon backend loading %s while already in state %s!", opts.Model, llm.Base.State.String())
|
||||
}
|
||||
|
||||
llm.Base.Lock()
|
||||
defer llm.Base.Unlock()
|
||||
model, err := transformers.NewFalcon(opts.ModelFile)
|
||||
llm.falcon = model
|
||||
return err
|
||||
}
|
||||
|
||||
func (llm *Falcon) Predict(opts *pb.PredictOptions) (string, error) {
|
||||
llm.Base.Lock()
|
||||
defer llm.Base.Unlock()
|
||||
return llm.falcon.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
}
|
||||
|
||||
// fallback to Predict
|
||||
func (llm *Falcon) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||
llm.Base.Lock()
|
||||
go func() {
|
||||
res, err := llm.falcon.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
|
||||
|
@ -37,6 +47,7 @@ func (llm *Falcon) PredictStream(opts *pb.PredictOptions, results chan string) e
|
|||
}
|
||||
results <- res
|
||||
close(results)
|
||||
llm.Base.Unlock()
|
||||
}()
|
||||
|
||||
return nil
|
||||
|
|
|
@ -7,6 +7,7 @@ import (
|
|||
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
||||
)
|
||||
|
@ -18,17 +19,26 @@ type GPT2 struct {
|
|||
}
|
||||
|
||||
func (llm *GPT2) Load(opts *pb.ModelOptions) error {
|
||||
if llm.Base.State != pb.StatusResponse_UNINITIALIZED {
|
||||
log.Warn().Msgf("gpt2 backend loading %s while already in state %s!", opts.Model, llm.Base.State.String())
|
||||
}
|
||||
|
||||
llm.Base.Lock()
|
||||
defer llm.Base.Unlock()
|
||||
model, err := transformers.New(opts.ModelFile)
|
||||
llm.gpt2 = model
|
||||
return err
|
||||
}
|
||||
|
||||
func (llm *GPT2) Predict(opts *pb.PredictOptions) (string, error) {
|
||||
llm.Base.Lock()
|
||||
defer llm.Base.Unlock()
|
||||
return llm.gpt2.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
}
|
||||
|
||||
// fallback to Predict
|
||||
func (llm *GPT2) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||
llm.Base.Lock()
|
||||
go func() {
|
||||
res, err := llm.gpt2.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
|
||||
|
@ -37,6 +47,7 @@ func (llm *GPT2) PredictStream(opts *pb.PredictOptions, results chan string) err
|
|||
}
|
||||
results <- res
|
||||
close(results)
|
||||
llm.Base.Unlock()
|
||||
}()
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@ import (
|
|||
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
||||
)
|
||||
|
@ -18,17 +19,26 @@ type GPTJ struct {
|
|||
}
|
||||
|
||||
func (llm *GPTJ) Load(opts *pb.ModelOptions) error {
|
||||
if llm.Base.State != pb.StatusResponse_UNINITIALIZED {
|
||||
log.Warn().Msgf("gptj backend loading %s while already in state %s!", opts.Model, llm.Base.State.String())
|
||||
}
|
||||
|
||||
llm.Base.Lock()
|
||||
defer llm.Base.Unlock()
|
||||
model, err := transformers.NewGPTJ(opts.ModelFile)
|
||||
llm.gptj = model
|
||||
return err
|
||||
}
|
||||
|
||||
func (llm *GPTJ) Predict(opts *pb.PredictOptions) (string, error) {
|
||||
llm.Base.Lock()
|
||||
defer llm.Base.Unlock()
|
||||
return llm.gptj.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
}
|
||||
|
||||
// fallback to Predict
|
||||
func (llm *GPTJ) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||
llm.Base.Lock()
|
||||
go func() {
|
||||
res, err := llm.gptj.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
|
||||
|
@ -37,6 +47,7 @@ func (llm *GPTJ) PredictStream(opts *pb.PredictOptions, results chan string) err
|
|||
}
|
||||
results <- res
|
||||
close(results)
|
||||
llm.Base.Unlock()
|
||||
}()
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@ import (
|
|||
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
||||
)
|
||||
|
@ -18,17 +19,26 @@ type GPTNeoX struct {
|
|||
}
|
||||
|
||||
func (llm *GPTNeoX) Load(opts *pb.ModelOptions) error {
|
||||
if llm.Base.State != pb.StatusResponse_UNINITIALIZED {
|
||||
log.Warn().Msgf("gptneox backend loading %s while already in state %s!", opts.Model, llm.Base.State.String())
|
||||
}
|
||||
|
||||
llm.Base.Lock()
|
||||
defer llm.Base.Unlock()
|
||||
model, err := transformers.NewGPTNeoX(opts.ModelFile)
|
||||
llm.gptneox = model
|
||||
return err
|
||||
}
|
||||
|
||||
func (llm *GPTNeoX) Predict(opts *pb.PredictOptions) (string, error) {
|
||||
llm.Base.Lock()
|
||||
defer llm.Base.Unlock()
|
||||
return llm.gptneox.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
}
|
||||
|
||||
// fallback to Predict
|
||||
func (llm *GPTNeoX) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||
llm.Base.Lock()
|
||||
go func() {
|
||||
res, err := llm.gptneox.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
|
||||
|
@ -37,6 +47,7 @@ func (llm *GPTNeoX) PredictStream(opts *pb.PredictOptions, results chan string)
|
|||
}
|
||||
results <- res
|
||||
close(results)
|
||||
llm.Base.Unlock()
|
||||
}()
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@ import (
|
|||
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
||||
)
|
||||
|
@ -18,17 +19,27 @@ type MPT struct {
|
|||
}
|
||||
|
||||
func (llm *MPT) Load(opts *pb.ModelOptions) error {
|
||||
if llm.Base.State != pb.StatusResponse_UNINITIALIZED {
|
||||
log.Warn().Msgf("mpt backend loading %s while already in state %s!", opts.Model, llm.Base.State.String())
|
||||
}
|
||||
|
||||
llm.Base.Lock()
|
||||
defer llm.Base.Unlock()
|
||||
model, err := transformers.NewMPT(opts.ModelFile)
|
||||
llm.mpt = model
|
||||
return err
|
||||
}
|
||||
|
||||
func (llm *MPT) Predict(opts *pb.PredictOptions) (string, error) {
|
||||
llm.Base.Lock()
|
||||
defer llm.Base.Unlock()
|
||||
|
||||
return llm.mpt.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
}
|
||||
|
||||
// fallback to Predict
|
||||
func (llm *MPT) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||
llm.Base.Lock()
|
||||
go func() {
|
||||
res, err := llm.mpt.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
|
||||
|
@ -37,6 +48,7 @@ func (llm *MPT) PredictStream(opts *pb.PredictOptions, results chan string) erro
|
|||
}
|
||||
results <- res
|
||||
close(results)
|
||||
llm.Base.Unlock()
|
||||
}()
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@ import (
|
|||
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
||||
)
|
||||
|
@ -18,17 +19,26 @@ type Replit struct {
|
|||
}
|
||||
|
||||
func (llm *Replit) Load(opts *pb.ModelOptions) error {
|
||||
if llm.Base.State != pb.StatusResponse_UNINITIALIZED {
|
||||
log.Warn().Msgf("replit backend loading %s while already in state %s!", opts.Model, llm.Base.State.String())
|
||||
}
|
||||
|
||||
llm.Base.Lock()
|
||||
defer llm.Base.Unlock()
|
||||
model, err := transformers.NewReplit(opts.ModelFile)
|
||||
llm.replit = model
|
||||
return err
|
||||
}
|
||||
|
||||
func (llm *Replit) Predict(opts *pb.PredictOptions) (string, error) {
|
||||
llm.Base.Lock()
|
||||
defer llm.Base.Unlock()
|
||||
return llm.replit.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
}
|
||||
|
||||
// fallback to Predict
|
||||
func (llm *Replit) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||
llm.Base.Lock()
|
||||
go func() {
|
||||
res, err := llm.replit.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
|
||||
|
@ -37,6 +47,7 @@ func (llm *Replit) PredictStream(opts *pb.PredictOptions, results chan string) e
|
|||
}
|
||||
results <- res
|
||||
close(results)
|
||||
llm.Base.Unlock()
|
||||
}()
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@ import (
|
|||
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
||||
)
|
||||
|
@ -18,17 +19,26 @@ type Starcoder struct {
|
|||
}
|
||||
|
||||
func (llm *Starcoder) Load(opts *pb.ModelOptions) error {
|
||||
if llm.Base.State != pb.StatusResponse_UNINITIALIZED {
|
||||
log.Warn().Msgf("starcoder backend loading %s while already in state %s!", opts.Model, llm.Base.State.String())
|
||||
}
|
||||
|
||||
llm.Base.Lock()
|
||||
defer llm.Base.Unlock()
|
||||
model, err := transformers.NewStarcoder(opts.ModelFile)
|
||||
llm.starcoder = model
|
||||
return err
|
||||
}
|
||||
|
||||
func (llm *Starcoder) Predict(opts *pb.PredictOptions) (string, error) {
|
||||
llm.Base.Lock()
|
||||
defer llm.Base.Unlock()
|
||||
return llm.starcoder.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
}
|
||||
|
||||
// fallback to Predict
|
||||
func (llm *Starcoder) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||
llm.Base.Lock()
|
||||
go func() {
|
||||
res, err := llm.starcoder.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
|
||||
|
@ -37,6 +47,7 @@ func (llm *Starcoder) PredictStream(opts *pb.PredictOptions, results chan string
|
|||
}
|
||||
results <- res
|
||||
close(results)
|
||||
llm.Base.Unlock()
|
||||
}()
|
||||
|
||||
return nil
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue