fix: drop racy code, refactor and group API schema (#931)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2023-08-20 14:04:45 +02:00 committed by GitHub
parent 28db83e17b
commit cc060a283d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
55 changed files with 239 additions and 317 deletions

View file

@ -0,0 +1,34 @@
package bert
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
bert "github.com/go-skynet/go-bert.cpp"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
)
type Embeddings struct {
base.SingleThread
bert *bert.Bert
}
func (llm *Embeddings) Load(opts *pb.ModelOptions) error {
model, err := bert.New(opts.ModelFile)
llm.bert = model
return err
}
func (llm *Embeddings) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
if len(opts.EmbeddingTokens) > 0 {
tokens := []int{}
for _, t := range opts.EmbeddingTokens {
tokens = append(tokens, int(t))
}
return llm.bert.TokenEmbeddings(tokens, bert.SetThreads(int(opts.Threads)))
}
return llm.bert.Embeddings(opts.Embeddings, bert.SetThreads(int(opts.Threads)))
}

View file

@ -0,0 +1,59 @@
package bloomz
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
"github.com/go-skynet/bloomz.cpp"
)
type LLM struct {
base.SingleThread
bloomz *bloomz.Bloomz
}
func (llm *LLM) Load(opts *pb.ModelOptions) error {
model, err := bloomz.New(opts.ModelFile)
llm.bloomz = model
return err
}
func buildPredictOptions(opts *pb.PredictOptions) []bloomz.PredictOption {
predictOptions := []bloomz.PredictOption{
bloomz.SetTemperature(float64(opts.Temperature)),
bloomz.SetTopP(float64(opts.TopP)),
bloomz.SetTopK(int(opts.TopK)),
bloomz.SetTokens(int(opts.Tokens)),
bloomz.SetThreads(int(opts.Threads)),
}
if opts.Seed != 0 {
predictOptions = append(predictOptions, bloomz.SetSeed(int(opts.Seed)))
}
return predictOptions
}
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
return llm.bloomz.Predict(opts.Prompt, buildPredictOptions(opts)...)
}
// fallback to Predict
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
go func() {
res, err := llm.bloomz.Predict(opts.Prompt, buildPredictOptions(opts)...)
if err != nil {
fmt.Println("err: ", err)
}
results <- res
close(results)
}()
return nil
}

View file

@ -0,0 +1,145 @@
package falcon
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
ggllm "github.com/mudler/go-ggllm.cpp"
)
type LLM struct {
base.SingleThread
falcon *ggllm.Falcon
}
func (llm *LLM) Load(opts *pb.ModelOptions) error {
ggllmOpts := []ggllm.ModelOption{}
if opts.ContextSize != 0 {
ggllmOpts = append(ggllmOpts, ggllm.SetContext(int(opts.ContextSize)))
}
// F16 doesn't seem to produce good output at all!
//if c.F16 {
// llamaOpts = append(llamaOpts, llama.EnableF16Memory)
//}
if opts.NGPULayers != 0 {
ggllmOpts = append(ggllmOpts, ggllm.SetGPULayers(int(opts.NGPULayers)))
}
ggllmOpts = append(ggllmOpts, ggllm.SetMMap(opts.MMap))
ggllmOpts = append(ggllmOpts, ggllm.SetMainGPU(opts.MainGPU))
ggllmOpts = append(ggllmOpts, ggllm.SetTensorSplit(opts.TensorSplit))
if opts.NBatch != 0 {
ggllmOpts = append(ggllmOpts, ggllm.SetNBatch(int(opts.NBatch)))
} else {
ggllmOpts = append(ggllmOpts, ggllm.SetNBatch(512))
}
model, err := ggllm.New(opts.ModelFile, ggllmOpts...)
llm.falcon = model
return err
}
func buildPredictOptions(opts *pb.PredictOptions) []ggllm.PredictOption {
predictOptions := []ggllm.PredictOption{
ggllm.SetTemperature(float64(opts.Temperature)),
ggllm.SetTopP(float64(opts.TopP)),
ggllm.SetTopK(int(opts.TopK)),
ggllm.SetTokens(int(opts.Tokens)),
ggllm.SetThreads(int(opts.Threads)),
}
if opts.PromptCacheAll {
predictOptions = append(predictOptions, ggllm.EnablePromptCacheAll)
}
if opts.PromptCacheRO {
predictOptions = append(predictOptions, ggllm.EnablePromptCacheRO)
}
// Expected absolute path
if opts.PromptCachePath != "" {
predictOptions = append(predictOptions, ggllm.SetPathPromptCache(opts.PromptCachePath))
}
if opts.Mirostat != 0 {
predictOptions = append(predictOptions, ggllm.SetMirostat(int(opts.Mirostat)))
}
if opts.MirostatETA != 0 {
predictOptions = append(predictOptions, ggllm.SetMirostatETA(float64(opts.MirostatETA)))
}
if opts.MirostatTAU != 0 {
predictOptions = append(predictOptions, ggllm.SetMirostatTAU(float64(opts.MirostatTAU)))
}
if opts.Debug {
predictOptions = append(predictOptions, ggllm.Debug)
}
predictOptions = append(predictOptions, ggllm.SetStopWords(opts.StopPrompts...))
if opts.PresencePenalty != 0 {
predictOptions = append(predictOptions, ggllm.SetPenalty(float64(opts.PresencePenalty)))
}
if opts.NKeep != 0 {
predictOptions = append(predictOptions, ggllm.SetNKeep(int(opts.NKeep)))
}
if opts.Batch != 0 {
predictOptions = append(predictOptions, ggllm.SetBatch(int(opts.Batch)))
}
if opts.IgnoreEOS {
predictOptions = append(predictOptions, ggllm.IgnoreEOS)
}
if opts.Seed != 0 {
predictOptions = append(predictOptions, ggllm.SetSeed(int(opts.Seed)))
}
//predictOptions = append(predictOptions, llama.SetLogitBias(c.Seed))
predictOptions = append(predictOptions, ggllm.SetFrequencyPenalty(float64(opts.FrequencyPenalty)))
predictOptions = append(predictOptions, ggllm.SetMlock(opts.MLock))
predictOptions = append(predictOptions, ggllm.SetMemoryMap(opts.MMap))
predictOptions = append(predictOptions, ggllm.SetPredictionMainGPU(opts.MainGPU))
predictOptions = append(predictOptions, ggllm.SetPredictionTensorSplit(opts.TensorSplit))
predictOptions = append(predictOptions, ggllm.SetTailFreeSamplingZ(float64(opts.TailFreeSamplingZ)))
predictOptions = append(predictOptions, ggllm.SetTypicalP(float64(opts.TypicalP)))
return predictOptions
}
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
return llm.falcon.Predict(opts.Prompt, buildPredictOptions(opts)...)
}
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
predictOptions := buildPredictOptions(opts)
predictOptions = append(predictOptions, ggllm.SetTokenCallback(func(token string) bool {
if token == "<|endoftext|>" {
return true
}
results <- token
return true
}))
go func() {
_, err := llm.falcon.Predict(opts.Prompt, predictOptions...)
if err != nil {
fmt.Println("err: ", err)
}
close(results)
}()
return nil
}

View file

@ -0,0 +1,62 @@
package gpt4all
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
)
type LLM struct {
base.SingleThread
gpt4all *gpt4all.Model
}
func (llm *LLM) Load(opts *pb.ModelOptions) error {
model, err := gpt4all.New(opts.ModelFile,
gpt4all.SetThreads(int(opts.Threads)),
gpt4all.SetLibrarySearchPath(opts.LibrarySearchPath))
llm.gpt4all = model
return err
}
func buildPredictOptions(opts *pb.PredictOptions) []gpt4all.PredictOption {
predictOptions := []gpt4all.PredictOption{
gpt4all.SetTemperature(float64(opts.Temperature)),
gpt4all.SetTopP(float64(opts.TopP)),
gpt4all.SetTopK(int(opts.TopK)),
gpt4all.SetTokens(int(opts.Tokens)),
}
if opts.Batch != 0 {
predictOptions = append(predictOptions, gpt4all.SetBatch(int(opts.Batch)))
}
return predictOptions
}
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
return llm.gpt4all.Predict(opts.Prompt, buildPredictOptions(opts)...)
}
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
predictOptions := buildPredictOptions(opts)
go func() {
llm.gpt4all.SetTokenCallback(func(token string) bool {
results <- token
return true
})
_, err := llm.gpt4all.Predict(opts.Prompt, predictOptions...)
if err != nil {
fmt.Println("err: ", err)
}
llm.gpt4all.SetTokenCallback(nil)
close(results)
}()
return nil
}

View file

@ -0,0 +1,58 @@
package langchain
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
"github.com/go-skynet/LocalAI/pkg/langchain"
)
type LLM struct {
base.Base
langchain *langchain.HuggingFace
model string
}
func (llm *LLM) Load(opts *pb.ModelOptions) error {
llm.langchain, _ = langchain.NewHuggingFace(opts.Model)
llm.model = opts.Model
return nil
}
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
o := []langchain.PredictOption{
langchain.SetModel(llm.model),
langchain.SetMaxTokens(int(opts.Tokens)),
langchain.SetTemperature(float64(opts.Temperature)),
langchain.SetStopWords(opts.StopPrompts),
}
pred, err := llm.langchain.PredictHuggingFace(opts.Prompt, o...)
if err != nil {
return "", err
}
return pred.Completion, nil
}
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
o := []langchain.PredictOption{
langchain.SetModel(llm.model),
langchain.SetMaxTokens(int(opts.Tokens)),
langchain.SetTemperature(float64(opts.Temperature)),
langchain.SetStopWords(opts.StopPrompts),
}
go func() {
res, err := llm.langchain.PredictHuggingFace(opts.Prompt, o...)
if err != nil {
fmt.Println("err: ", err)
}
results <- res.Completion
close(results)
}()
return nil
}

View file

@ -0,0 +1,216 @@
package llama
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
"github.com/go-skynet/go-llama.cpp"
)
type LLM struct {
base.SingleThread
llama *llama.LLama
}
func (llm *LLM) Load(opts *pb.ModelOptions) error {
ropeFreqBase := float32(10000)
ropeFreqScale := float32(1)
if opts.RopeFreqBase != 0 {
ropeFreqBase = opts.RopeFreqBase
}
if opts.RopeFreqScale != 0 {
ropeFreqScale = opts.RopeFreqScale
}
llamaOpts := []llama.ModelOption{
llama.WithRopeFreqBase(ropeFreqBase),
llama.WithRopeFreqScale(ropeFreqScale),
}
if opts.NGQA != 0 {
llamaOpts = append(llamaOpts, llama.WithGQA(int(opts.NGQA)))
}
if opts.RMSNormEps != 0 {
llamaOpts = append(llamaOpts, llama.WithRMSNormEPS(opts.RMSNormEps))
}
if opts.ContextSize != 0 {
llamaOpts = append(llamaOpts, llama.SetContext(int(opts.ContextSize)))
}
if opts.F16Memory {
llamaOpts = append(llamaOpts, llama.EnableF16Memory)
}
if opts.Embeddings {
llamaOpts = append(llamaOpts, llama.EnableEmbeddings)
}
if opts.NGPULayers != 0 {
llamaOpts = append(llamaOpts, llama.SetGPULayers(int(opts.NGPULayers)))
}
llamaOpts = append(llamaOpts, llama.SetMMap(opts.MMap))
llamaOpts = append(llamaOpts, llama.SetMainGPU(opts.MainGPU))
llamaOpts = append(llamaOpts, llama.SetTensorSplit(opts.TensorSplit))
if opts.NBatch != 0 {
llamaOpts = append(llamaOpts, llama.SetNBatch(int(opts.NBatch)))
} else {
llamaOpts = append(llamaOpts, llama.SetNBatch(512))
}
if opts.NUMA {
llamaOpts = append(llamaOpts, llama.EnableNUMA)
}
if opts.LowVRAM {
llamaOpts = append(llamaOpts, llama.EnabelLowVRAM)
}
model, err := llama.New(opts.ModelFile, llamaOpts...)
llm.llama = model
return err
}
func buildPredictOptions(opts *pb.PredictOptions) []llama.PredictOption {
ropeFreqBase := float32(10000)
ropeFreqScale := float32(1)
if opts.RopeFreqBase != 0 {
ropeFreqBase = opts.RopeFreqBase
}
if opts.RopeFreqScale != 0 {
ropeFreqScale = opts.RopeFreqScale
}
predictOptions := []llama.PredictOption{
llama.SetTemperature(opts.Temperature),
llama.SetTopP(opts.TopP),
llama.SetTopK(int(opts.TopK)),
llama.SetTokens(int(opts.Tokens)),
llama.SetThreads(int(opts.Threads)),
llama.WithGrammar(opts.Grammar),
llama.SetRopeFreqBase(ropeFreqBase),
llama.SetRopeFreqScale(ropeFreqScale),
llama.SetNegativePromptScale(opts.NegativePromptScale),
llama.SetNegativePrompt(opts.NegativePrompt),
}
if opts.PromptCacheAll {
predictOptions = append(predictOptions, llama.EnablePromptCacheAll)
}
if opts.PromptCacheRO {
predictOptions = append(predictOptions, llama.EnablePromptCacheRO)
}
// Expected absolute path
if opts.PromptCachePath != "" {
predictOptions = append(predictOptions, llama.SetPathPromptCache(opts.PromptCachePath))
}
if opts.Mirostat != 0 {
predictOptions = append(predictOptions, llama.SetMirostat(int(opts.Mirostat)))
}
if opts.MirostatETA != 0 {
predictOptions = append(predictOptions, llama.SetMirostatETA(opts.MirostatETA))
}
if opts.MirostatTAU != 0 {
predictOptions = append(predictOptions, llama.SetMirostatTAU(opts.MirostatTAU))
}
if opts.Debug {
predictOptions = append(predictOptions, llama.Debug)
}
predictOptions = append(predictOptions, llama.SetStopWords(opts.StopPrompts...))
if opts.PresencePenalty != 0 {
predictOptions = append(predictOptions, llama.SetPenalty(opts.PresencePenalty))
}
if opts.NKeep != 0 {
predictOptions = append(predictOptions, llama.SetNKeep(int(opts.NKeep)))
}
if opts.Batch != 0 {
predictOptions = append(predictOptions, llama.SetBatch(int(opts.Batch)))
}
if opts.F16KV {
predictOptions = append(predictOptions, llama.EnableF16KV)
}
if opts.IgnoreEOS {
predictOptions = append(predictOptions, llama.IgnoreEOS)
}
if opts.Seed != 0 {
predictOptions = append(predictOptions, llama.SetSeed(int(opts.Seed)))
}
//predictOptions = append(predictOptions, llama.SetLogitBias(c.Seed))
predictOptions = append(predictOptions, llama.SetFrequencyPenalty(opts.FrequencyPenalty))
predictOptions = append(predictOptions, llama.SetMlock(opts.MLock))
predictOptions = append(predictOptions, llama.SetMemoryMap(opts.MMap))
predictOptions = append(predictOptions, llama.SetPredictionMainGPU(opts.MainGPU))
predictOptions = append(predictOptions, llama.SetPredictionTensorSplit(opts.TensorSplit))
predictOptions = append(predictOptions, llama.SetTailFreeSamplingZ(opts.TailFreeSamplingZ))
predictOptions = append(predictOptions, llama.SetTypicalP(opts.TypicalP))
return predictOptions
}
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
return llm.llama.Predict(opts.Prompt, buildPredictOptions(opts)...)
}
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
predictOptions := buildPredictOptions(opts)
predictOptions = append(predictOptions, llama.SetTokenCallback(func(token string) bool {
results <- token
return true
}))
go func() {
_, err := llm.llama.Predict(opts.Prompt, predictOptions...)
if err != nil {
fmt.Println("err: ", err)
}
close(results)
}()
return nil
}
func (llm *LLM) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
predictOptions := buildPredictOptions(opts)
if len(opts.EmbeddingTokens) > 0 {
tokens := []int{}
for _, t := range opts.EmbeddingTokens {
tokens = append(tokens, int(t))
}
return llm.llama.TokenEmbeddings(tokens, predictOptions...)
}
return llm.llama.Embeddings(opts.Embeddings, predictOptions...)
}
func (llm *LLM) TokenizeString(opts *pb.PredictOptions) (pb.TokenizationResponse, error) {
predictOptions := buildPredictOptions(opts)
l, tokens, err := llm.llama.TokenizeString(opts.Prompt, predictOptions...)
if err != nil {
return pb.TokenizationResponse{}, err
}
return pb.TokenizationResponse{
Length: l,
Tokens: tokens,
}, nil
}

View file

@ -0,0 +1,70 @@
package rwkv
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"path/filepath"
"github.com/donomii/go-rwkv.cpp"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
)
const tokenizerSuffix = ".tokenizer.json"
type LLM struct {
base.SingleThread
rwkv *rwkv.RwkvState
}
func (llm *LLM) Load(opts *pb.ModelOptions) error {
modelPath := filepath.Dir(opts.ModelFile)
modelFile := filepath.Base(opts.ModelFile)
model := rwkv.LoadFiles(opts.ModelFile, filepath.Join(modelPath, modelFile+tokenizerSuffix), uint32(opts.GetThreads()))
if model == nil {
return fmt.Errorf("could not load model")
}
llm.rwkv = model
return nil
}
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
stopWord := "\n"
if len(opts.StopPrompts) > 0 {
stopWord = opts.StopPrompts[0]
}
if err := llm.rwkv.ProcessInput(opts.Prompt); err != nil {
return "", err
}
response := llm.rwkv.GenerateResponse(int(opts.Tokens), stopWord, float32(opts.Temperature), float32(opts.TopP), nil)
return response, nil
}
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
go func() {
stopWord := "\n"
if len(opts.StopPrompts) > 0 {
stopWord = opts.StopPrompts[0]
}
if err := llm.rwkv.ProcessInput(opts.Prompt); err != nil {
fmt.Println("Error processing input: ", err)
return
}
llm.rwkv.GenerateResponse(int(opts.Tokens), stopWord, float32(opts.Temperature), float32(opts.TopP), func(s string) bool {
results <- s
return true
})
close(results)
}()
return nil
}

View file

@ -0,0 +1,44 @@
package transformers
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
)
type Dolly struct {
base.SingleThread
dolly *transformers.Dolly
}
func (llm *Dolly) Load(opts *pb.ModelOptions) error {
model, err := transformers.NewDolly(opts.ModelFile)
llm.dolly = model
return err
}
func (llm *Dolly) Predict(opts *pb.PredictOptions) (string, error) {
return llm.dolly.Predict(opts.Prompt, buildPredictOptions(opts)...)
}
// fallback to Predict
func (llm *Dolly) PredictStream(opts *pb.PredictOptions, results chan string) error {
go func() {
res, err := llm.dolly.Predict(opts.Prompt, buildPredictOptions(opts)...)
if err != nil {
fmt.Println("err: ", err)
}
results <- res
close(results)
}()
return nil
}

View file

@ -0,0 +1,43 @@
package transformers
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
)
type Falcon struct {
base.SingleThread
falcon *transformers.Falcon
}
func (llm *Falcon) Load(opts *pb.ModelOptions) error {
model, err := transformers.NewFalcon(opts.ModelFile)
llm.falcon = model
return err
}
func (llm *Falcon) Predict(opts *pb.PredictOptions) (string, error) {
return llm.falcon.Predict(opts.Prompt, buildPredictOptions(opts)...)
}
// fallback to Predict
func (llm *Falcon) PredictStream(opts *pb.PredictOptions, results chan string) error {
go func() {
res, err := llm.falcon.Predict(opts.Prompt, buildPredictOptions(opts)...)
if err != nil {
fmt.Println("err: ", err)
}
results <- res
close(results)
}()
return nil
}

View file

@ -0,0 +1,42 @@
package transformers
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
)
type GPT2 struct {
base.SingleThread
gpt2 *transformers.GPT2
}
func (llm *GPT2) Load(opts *pb.ModelOptions) error {
model, err := transformers.New(opts.ModelFile)
llm.gpt2 = model
return err
}
func (llm *GPT2) Predict(opts *pb.PredictOptions) (string, error) {
return llm.gpt2.Predict(opts.Prompt, buildPredictOptions(opts)...)
}
// fallback to Predict
func (llm *GPT2) PredictStream(opts *pb.PredictOptions, results chan string) error {
go func() {
res, err := llm.gpt2.Predict(opts.Prompt, buildPredictOptions(opts)...)
if err != nil {
fmt.Println("err: ", err)
}
results <- res
close(results)
}()
return nil
}

View file

@ -0,0 +1,42 @@
package transformers
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
)
type GPTJ struct {
base.SingleThread
gptj *transformers.GPTJ
}
func (llm *GPTJ) Load(opts *pb.ModelOptions) error {
model, err := transformers.NewGPTJ(opts.ModelFile)
llm.gptj = model
return err
}
func (llm *GPTJ) Predict(opts *pb.PredictOptions) (string, error) {
return llm.gptj.Predict(opts.Prompt, buildPredictOptions(opts)...)
}
// fallback to Predict
func (llm *GPTJ) PredictStream(opts *pb.PredictOptions, results chan string) error {
go func() {
res, err := llm.gptj.Predict(opts.Prompt, buildPredictOptions(opts)...)
if err != nil {
fmt.Println("err: ", err)
}
results <- res
close(results)
}()
return nil
}

View file

@ -0,0 +1,42 @@
package transformers
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
)
type GPTNeoX struct {
base.SingleThread
gptneox *transformers.GPTNeoX
}
func (llm *GPTNeoX) Load(opts *pb.ModelOptions) error {
model, err := transformers.NewGPTNeoX(opts.ModelFile)
llm.gptneox = model
return err
}
func (llm *GPTNeoX) Predict(opts *pb.PredictOptions) (string, error) {
return llm.gptneox.Predict(opts.Prompt, buildPredictOptions(opts)...)
}
// fallback to Predict
func (llm *GPTNeoX) PredictStream(opts *pb.PredictOptions, results chan string) error {
go func() {
res, err := llm.gptneox.Predict(opts.Prompt, buildPredictOptions(opts)...)
if err != nil {
fmt.Println("err: ", err)
}
results <- res
close(results)
}()
return nil
}

View file

@ -0,0 +1,42 @@
package transformers
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
)
type MPT struct {
base.SingleThread
mpt *transformers.MPT
}
func (llm *MPT) Load(opts *pb.ModelOptions) error {
model, err := transformers.NewMPT(opts.ModelFile)
llm.mpt = model
return err
}
func (llm *MPT) Predict(opts *pb.PredictOptions) (string, error) {
return llm.mpt.Predict(opts.Prompt, buildPredictOptions(opts)...)
}
// fallback to Predict
func (llm *MPT) PredictStream(opts *pb.PredictOptions, results chan string) error {
go func() {
res, err := llm.mpt.Predict(opts.Prompt, buildPredictOptions(opts)...)
if err != nil {
fmt.Println("err: ", err)
}
results <- res
close(results)
}()
return nil
}

View file

@ -0,0 +1,26 @@
package transformers
import (
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
)
func buildPredictOptions(opts *pb.PredictOptions) []transformers.PredictOption {
predictOptions := []transformers.PredictOption{
transformers.SetTemperature(float64(opts.Temperature)),
transformers.SetTopP(float64(opts.TopP)),
transformers.SetTopK(int(opts.TopK)),
transformers.SetTokens(int(opts.Tokens)),
transformers.SetThreads(int(opts.Threads)),
}
if opts.Batch != 0 {
predictOptions = append(predictOptions, transformers.SetBatch(int(opts.Batch)))
}
if opts.Seed != 0 {
predictOptions = append(predictOptions, transformers.SetSeed(int(opts.Seed)))
}
return predictOptions
}

View file

@ -0,0 +1,42 @@
package transformers
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
)
type Replit struct {
base.SingleThread
replit *transformers.Replit
}
func (llm *Replit) Load(opts *pb.ModelOptions) error {
model, err := transformers.NewReplit(opts.ModelFile)
llm.replit = model
return err
}
func (llm *Replit) Predict(opts *pb.PredictOptions) (string, error) {
return llm.replit.Predict(opts.Prompt, buildPredictOptions(opts)...)
}
// fallback to Predict
func (llm *Replit) PredictStream(opts *pb.PredictOptions, results chan string) error {
go func() {
res, err := llm.replit.Predict(opts.Prompt, buildPredictOptions(opts)...)
if err != nil {
fmt.Println("err: ", err)
}
results <- res
close(results)
}()
return nil
}

View file

@ -0,0 +1,43 @@
package transformers
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
)
type Starcoder struct {
base.SingleThread
starcoder *transformers.Starcoder
}
func (llm *Starcoder) Load(opts *pb.ModelOptions) error {
model, err := transformers.NewStarcoder(opts.ModelFile)
llm.starcoder = model
return err
}
func (llm *Starcoder) Predict(opts *pb.PredictOptions) (string, error) {
return llm.starcoder.Predict(opts.Prompt, buildPredictOptions(opts)...)
}
// fallback to Predict
func (llm *Starcoder) PredictStream(opts *pb.PredictOptions, results chan string) error {
go func() {
res, err := llm.starcoder.Predict(opts.Prompt, buildPredictOptions(opts)...)
if err != nil {
fmt.Println("err: ", err)
}
results <- res
close(results)
}()
return nil
}