mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-28 14:35:00 +00:00
feat: move other backends to grpc
This finally makes everything more consistent Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
5dcfdbe51d
commit
1d0ed95a54
54 changed files with 3171 additions and 1712 deletions
42
pkg/grpc/base/base.go
Normal file
42
pkg/grpc/base/base.go
Normal file
|
@ -0,0 +1,42 @@
|
|||
package base
|
||||
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/whisper/api"
|
||||
)
|
||||
|
||||
type Base struct {
|
||||
}
|
||||
|
||||
func (llm *Base) Load(opts *pb.ModelOptions) error {
|
||||
return fmt.Errorf("unimplemented")
|
||||
|
||||
}
|
||||
|
||||
func (llm *Base) Predict(opts *pb.PredictOptions) (string, error) {
|
||||
return "", fmt.Errorf("unimplemented")
|
||||
}
|
||||
|
||||
func (llm *Base) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||
return fmt.Errorf("unimplemented")
|
||||
}
|
||||
|
||||
func (llm *Base) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
|
||||
return []float32{}, fmt.Errorf("unimplemented")
|
||||
}
|
||||
|
||||
func (llm *Base) GenerateImage(*pb.GenerateImageRequest) error {
|
||||
return fmt.Errorf("unimplemented")
|
||||
}
|
||||
|
||||
func (llm *Base) AudioTranscription(*pb.TranscriptRequest) (api.Result, error) {
|
||||
return api.Result{}, fmt.Errorf("unimplemented")
|
||||
}
|
||||
|
||||
func (llm *Base) TTS(*pb.TTSRequest) error {
|
||||
return fmt.Errorf("unimplemented")
|
||||
}
|
|
@ -7,6 +7,7 @@ import (
|
|||
"time"
|
||||
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/whisper/api"
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/credentials/insecure"
|
||||
)
|
||||
|
@ -28,7 +29,7 @@ func (c *Client) HealthCheck(ctx context.Context) bool {
|
|||
return false
|
||||
}
|
||||
defer conn.Close()
|
||||
client := pb.NewLLMClient(conn)
|
||||
client := pb.NewBackendClient(conn)
|
||||
|
||||
// The healthcheck call shouldn't take long time
|
||||
ctx, cancel := context.WithTimeout(ctx, 10*time.Second)
|
||||
|
@ -53,7 +54,7 @@ func (c *Client) Embeddings(ctx context.Context, in *pb.PredictOptions, opts ...
|
|||
return nil, err
|
||||
}
|
||||
defer conn.Close()
|
||||
client := pb.NewLLMClient(conn)
|
||||
client := pb.NewBackendClient(conn)
|
||||
|
||||
return client.Embedding(ctx, in, opts...)
|
||||
}
|
||||
|
@ -64,7 +65,7 @@ func (c *Client) Predict(ctx context.Context, in *pb.PredictOptions, opts ...grp
|
|||
return nil, err
|
||||
}
|
||||
defer conn.Close()
|
||||
client := pb.NewLLMClient(conn)
|
||||
client := pb.NewBackendClient(conn)
|
||||
|
||||
return client.Predict(ctx, in, opts...)
|
||||
}
|
||||
|
@ -75,7 +76,7 @@ func (c *Client) LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grp
|
|||
return nil, err
|
||||
}
|
||||
defer conn.Close()
|
||||
client := pb.NewLLMClient(conn)
|
||||
client := pb.NewBackendClient(conn)
|
||||
return client.LoadModel(ctx, in, opts...)
|
||||
}
|
||||
|
||||
|
@ -85,7 +86,7 @@ func (c *Client) PredictStream(ctx context.Context, in *pb.PredictOptions, f fun
|
|||
return err
|
||||
}
|
||||
defer conn.Close()
|
||||
client := pb.NewLLMClient(conn)
|
||||
client := pb.NewBackendClient(conn)
|
||||
|
||||
stream, err := client.PredictStream(ctx, in, opts...)
|
||||
if err != nil {
|
||||
|
@ -107,3 +108,53 @@ func (c *Client) PredictStream(ctx context.Context, in *pb.PredictOptions, f fun
|
|||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Client) GenerateImage(ctx context.Context, in *pb.GenerateImageRequest, opts ...grpc.CallOption) (*pb.Result, error) {
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer conn.Close()
|
||||
client := pb.NewBackendClient(conn)
|
||||
return client.GenerateImage(ctx, in, opts...)
|
||||
}
|
||||
|
||||
func (c *Client) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error) {
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer conn.Close()
|
||||
client := pb.NewBackendClient(conn)
|
||||
return client.TTS(ctx, in, opts...)
|
||||
}
|
||||
|
||||
func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*api.Result, error) {
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer conn.Close()
|
||||
client := pb.NewBackendClient(conn)
|
||||
res, err := client.AudioTranscription(ctx, in, opts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tresult := &api.Result{}
|
||||
for _, s := range res.Segments {
|
||||
tks := []int{}
|
||||
for _, t := range s.Tokens {
|
||||
tks = append(tks, int(t))
|
||||
}
|
||||
tresult.Segments = append(tresult.Segments,
|
||||
api.Segment{
|
||||
Text: s.Text,
|
||||
Id: int(s.Id),
|
||||
Start: time.Duration(s.Start),
|
||||
End: time.Duration(s.End),
|
||||
Tokens: tks,
|
||||
})
|
||||
}
|
||||
tresult.Text = res.Text
|
||||
return tresult, err
|
||||
}
|
||||
|
|
33
pkg/grpc/image/stablediffusion.go
Normal file
33
pkg/grpc/image/stablediffusion.go
Normal file
|
@ -0,0 +1,33 @@
|
|||
package image
|
||||
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
import (
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
"github.com/go-skynet/LocalAI/pkg/stablediffusion"
|
||||
)
|
||||
|
||||
type StableDiffusion struct {
|
||||
base.Base
|
||||
stablediffusion *stablediffusion.StableDiffusion
|
||||
}
|
||||
|
||||
func (sd *StableDiffusion) Load(opts *pb.ModelOptions) error {
|
||||
var err error
|
||||
// Note: the Model here is a path to a directory containing the model files
|
||||
sd.stablediffusion, err = stablediffusion.New(opts.Model)
|
||||
return err
|
||||
}
|
||||
|
||||
func (sd *StableDiffusion) GenerateImage(opts *pb.GenerateImageRequest) error {
|
||||
return sd.stablediffusion.GenerateImage(
|
||||
int(opts.Height),
|
||||
int(opts.Width),
|
||||
int(opts.Mode),
|
||||
int(opts.Step),
|
||||
int(opts.Seed),
|
||||
opts.PositivePrompt,
|
||||
opts.NegativePrompt,
|
||||
opts.Dst)
|
||||
}
|
|
@ -2,11 +2,15 @@ package grpc
|
|||
|
||||
import (
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/whisper/api"
|
||||
)
|
||||
|
||||
type LLM interface {
|
||||
Predict(*pb.PredictOptions) (string, error)
|
||||
PredictStream(*pb.PredictOptions, chan string)
|
||||
PredictStream(*pb.PredictOptions, chan string) error
|
||||
Load(*pb.ModelOptions) error
|
||||
Embeddings(*pb.PredictOptions) ([]float32, error)
|
||||
GenerateImage(*pb.GenerateImageRequest) error
|
||||
AudioTranscription(*pb.TranscriptRequest) (api.Result, error)
|
||||
TTS(*pb.TTSRequest) error
|
||||
}
|
||||
|
|
33
pkg/grpc/llm/bert/bert.go
Normal file
33
pkg/grpc/llm/bert/bert.go
Normal file
|
@ -0,0 +1,33 @@
|
|||
package bert
|
||||
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
import (
|
||||
bert "github.com/go-skynet/go-bert.cpp"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
)
|
||||
|
||||
type Embeddings struct {
|
||||
base.Base
|
||||
bert *bert.Bert
|
||||
}
|
||||
|
||||
func (llm *Embeddings) Load(opts *pb.ModelOptions) error {
|
||||
model, err := bert.New(opts.Model)
|
||||
llm.bert = model
|
||||
return err
|
||||
}
|
||||
|
||||
func (llm *Embeddings) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
|
||||
if len(opts.EmbeddingTokens) > 0 {
|
||||
tokens := []int{}
|
||||
for _, t := range opts.EmbeddingTokens {
|
||||
tokens = append(tokens, int(t))
|
||||
}
|
||||
return llm.bert.TokenEmbeddings(tokens, bert.SetThreads(int(opts.Threads)))
|
||||
}
|
||||
|
||||
return llm.bert.Embeddings(opts.Embeddings, bert.SetThreads(int(opts.Threads)))
|
||||
}
|
59
pkg/grpc/llm/bloomz/bloomz.go
Normal file
59
pkg/grpc/llm/bloomz/bloomz.go
Normal file
|
@ -0,0 +1,59 @@
|
|||
package bloomz
|
||||
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
|
||||
"github.com/go-skynet/bloomz.cpp"
|
||||
)
|
||||
|
||||
type LLM struct {
|
||||
base.Base
|
||||
|
||||
bloomz *bloomz.Bloomz
|
||||
}
|
||||
|
||||
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||
model, err := bloomz.New(opts.Model)
|
||||
llm.bloomz = model
|
||||
return err
|
||||
}
|
||||
|
||||
func buildPredictOptions(opts *pb.PredictOptions) []bloomz.PredictOption {
|
||||
predictOptions := []bloomz.PredictOption{
|
||||
bloomz.SetTemperature(float64(opts.Temperature)),
|
||||
bloomz.SetTopP(float64(opts.TopP)),
|
||||
bloomz.SetTopK(int(opts.TopK)),
|
||||
bloomz.SetTokens(int(opts.Tokens)),
|
||||
bloomz.SetThreads(int(opts.Threads)),
|
||||
}
|
||||
|
||||
if opts.Seed != 0 {
|
||||
predictOptions = append(predictOptions, bloomz.SetSeed(int(opts.Seed)))
|
||||
}
|
||||
|
||||
return predictOptions
|
||||
}
|
||||
|
||||
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
||||
return llm.bloomz.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
}
|
||||
|
||||
// fallback to Predict
|
||||
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||
go func() {
|
||||
res, err := llm.bloomz.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
|
||||
if err != nil {
|
||||
fmt.Println("err: ", err)
|
||||
}
|
||||
results <- res
|
||||
close(results)
|
||||
}()
|
||||
|
||||
return nil
|
||||
}
|
|
@ -5,12 +5,15 @@ package falcon
|
|||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
|
||||
ggllm "github.com/mudler/go-ggllm.cpp"
|
||||
)
|
||||
|
||||
type LLM struct {
|
||||
base.Base
|
||||
|
||||
falcon *ggllm.Falcon
|
||||
}
|
||||
|
||||
|
@ -42,10 +45,6 @@ func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
|||
return err
|
||||
}
|
||||
|
||||
func (llm *LLM) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
|
||||
return nil, fmt.Errorf("not implemented")
|
||||
}
|
||||
|
||||
func buildPredictOptions(opts *pb.PredictOptions) []ggllm.PredictOption {
|
||||
predictOptions := []ggllm.PredictOption{
|
||||
ggllm.SetTemperature(float64(opts.Temperature)),
|
||||
|
@ -122,7 +121,7 @@ func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
|||
return llm.falcon.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
}
|
||||
|
||||
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) {
|
||||
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||
predictOptions := buildPredictOptions(opts)
|
||||
|
||||
predictOptions = append(predictOptions, ggllm.SetTokenCallback(func(token string) bool {
|
||||
|
@ -140,4 +139,6 @@ func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) {
|
|||
}
|
||||
close(results)
|
||||
}()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -5,11 +5,14 @@ package gpt4all
|
|||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
|
||||
)
|
||||
|
||||
type LLM struct {
|
||||
base.Base
|
||||
|
||||
gpt4all *gpt4all.Model
|
||||
}
|
||||
|
||||
|
@ -39,7 +42,7 @@ func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
|||
return llm.gpt4all.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
}
|
||||
|
||||
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) {
|
||||
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||
predictOptions := buildPredictOptions(opts)
|
||||
|
||||
go func() {
|
||||
|
@ -54,8 +57,6 @@ func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) {
|
|||
llm.gpt4all.SetTokenCallback(nil)
|
||||
close(results)
|
||||
}()
|
||||
}
|
||||
|
||||
func (llm *LLM) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
|
||||
return []float32{}, fmt.Errorf("not implemented")
|
||||
return nil
|
||||
}
|
||||
|
|
58
pkg/grpc/llm/langchain/langchain.go
Normal file
58
pkg/grpc/llm/langchain/langchain.go
Normal file
|
@ -0,0 +1,58 @@
|
|||
package langchain
|
||||
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
"github.com/go-skynet/LocalAI/pkg/langchain"
|
||||
)
|
||||
|
||||
type LLM struct {
|
||||
base.Base
|
||||
|
||||
langchain *langchain.HuggingFace
|
||||
model string
|
||||
}
|
||||
|
||||
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||
llm.langchain, _ = langchain.NewHuggingFace(opts.Model)
|
||||
llm.model = opts.Model
|
||||
return nil
|
||||
}
|
||||
|
||||
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
||||
o := []langchain.PredictOption{
|
||||
langchain.SetModel(llm.model),
|
||||
langchain.SetMaxTokens(int(opts.Tokens)),
|
||||
langchain.SetTemperature(float64(opts.Temperature)),
|
||||
langchain.SetStopWords(opts.StopPrompts),
|
||||
}
|
||||
pred, err := llm.langchain.PredictHuggingFace(opts.Prompt, o...)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return pred.Completion, nil
|
||||
}
|
||||
|
||||
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||
o := []langchain.PredictOption{
|
||||
langchain.SetModel(llm.model),
|
||||
langchain.SetMaxTokens(int(opts.Tokens)),
|
||||
langchain.SetTemperature(float64(opts.Temperature)),
|
||||
langchain.SetStopWords(opts.StopPrompts),
|
||||
}
|
||||
go func() {
|
||||
res, err := llm.langchain.PredictHuggingFace(opts.Prompt, o...)
|
||||
|
||||
if err != nil {
|
||||
fmt.Println("err: ", err)
|
||||
}
|
||||
results <- res.Completion
|
||||
close(results)
|
||||
}()
|
||||
|
||||
return nil
|
||||
}
|
|
@ -5,11 +5,14 @@ package llama
|
|||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
"github.com/go-skynet/go-llama.cpp"
|
||||
)
|
||||
|
||||
type LLM struct {
|
||||
base.Base
|
||||
|
||||
llama *llama.LLama
|
||||
}
|
||||
|
||||
|
@ -133,7 +136,7 @@ func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
|||
return llm.llama.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
}
|
||||
|
||||
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) {
|
||||
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||
predictOptions := buildPredictOptions(opts)
|
||||
|
||||
predictOptions = append(predictOptions, llama.SetTokenCallback(func(token string) bool {
|
||||
|
@ -148,6 +151,8 @@ func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) {
|
|||
}
|
||||
close(results)
|
||||
}()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (llm *LLM) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
|
||||
|
|
71
pkg/grpc/llm/rwkv/rwkv.go
Normal file
71
pkg/grpc/llm/rwkv/rwkv.go
Normal file
|
@ -0,0 +1,71 @@
|
|||
package rwkv
|
||||
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/donomii/go-rwkv.cpp"
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
)
|
||||
|
||||
const tokenizerSuffix = ".tokenizer.json"
|
||||
|
||||
type LLM struct {
|
||||
base.Base
|
||||
|
||||
rwkv *rwkv.RwkvState
|
||||
}
|
||||
|
||||
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||
modelPath := filepath.Dir(opts.Model)
|
||||
modelFile := filepath.Base(opts.Model)
|
||||
model := rwkv.LoadFiles(opts.Model, filepath.Join(modelPath, modelFile+tokenizerSuffix), uint32(opts.GetThreads()))
|
||||
|
||||
if model == nil {
|
||||
return fmt.Errorf("could not load model")
|
||||
}
|
||||
llm.rwkv = model
|
||||
return nil
|
||||
}
|
||||
|
||||
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
||||
|
||||
stopWord := "\n"
|
||||
if len(opts.StopPrompts) > 0 {
|
||||
stopWord = opts.StopPrompts[0]
|
||||
}
|
||||
|
||||
if err := llm.rwkv.ProcessInput(opts.Prompt); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
response := llm.rwkv.GenerateResponse(int(opts.Tokens), stopWord, float32(opts.Temperature), float32(opts.TopP), nil)
|
||||
|
||||
return response, nil
|
||||
}
|
||||
|
||||
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||
go func() {
|
||||
|
||||
stopWord := "\n"
|
||||
if len(opts.StopPrompts) > 0 {
|
||||
stopWord = opts.StopPrompts[0]
|
||||
}
|
||||
|
||||
if err := llm.rwkv.ProcessInput(opts.Prompt); err != nil {
|
||||
fmt.Println("Error processing input: ", err)
|
||||
return
|
||||
}
|
||||
|
||||
llm.rwkv.GenerateResponse(int(opts.Tokens), stopWord, float32(opts.Temperature), float32(opts.TopP), func(s string) bool {
|
||||
results <- s
|
||||
return true
|
||||
})
|
||||
close(results)
|
||||
}()
|
||||
|
||||
return nil
|
||||
}
|
|
@ -5,12 +5,15 @@ package transformers
|
|||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
|
||||
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
||||
)
|
||||
|
||||
type Dolly struct {
|
||||
base.Base
|
||||
|
||||
dolly *transformers.Dolly
|
||||
}
|
||||
|
||||
|
@ -20,16 +23,12 @@ func (llm *Dolly) Load(opts *pb.ModelOptions) error {
|
|||
return err
|
||||
}
|
||||
|
||||
func (llm *Dolly) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
|
||||
return nil, fmt.Errorf("not implemented")
|
||||
}
|
||||
|
||||
func (llm *Dolly) Predict(opts *pb.PredictOptions) (string, error) {
|
||||
return llm.dolly.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
}
|
||||
|
||||
// fallback to Predict
|
||||
func (llm *Dolly) PredictStream(opts *pb.PredictOptions, results chan string) {
|
||||
func (llm *Dolly) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||
go func() {
|
||||
res, err := llm.dolly.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
|
||||
|
@ -39,4 +38,6 @@ func (llm *Dolly) PredictStream(opts *pb.PredictOptions, results chan string) {
|
|||
results <- res
|
||||
close(results)
|
||||
}()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
43
pkg/grpc/llm/transformers/falcon.go
Normal file
43
pkg/grpc/llm/transformers/falcon.go
Normal file
|
@ -0,0 +1,43 @@
|
|||
package transformers
|
||||
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
|
||||
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
||||
)
|
||||
|
||||
type Falcon struct {
|
||||
base.Base
|
||||
|
||||
falcon *transformers.Falcon
|
||||
}
|
||||
|
||||
func (llm *Falcon) Load(opts *pb.ModelOptions) error {
|
||||
model, err := transformers.NewFalcon(opts.Model)
|
||||
llm.falcon = model
|
||||
return err
|
||||
}
|
||||
|
||||
func (llm *Falcon) Predict(opts *pb.PredictOptions) (string, error) {
|
||||
return llm.falcon.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
}
|
||||
|
||||
// fallback to Predict
|
||||
func (llm *Falcon) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||
go func() {
|
||||
res, err := llm.falcon.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
|
||||
if err != nil {
|
||||
fmt.Println("err: ", err)
|
||||
}
|
||||
results <- res
|
||||
close(results)
|
||||
}()
|
||||
|
||||
return nil
|
||||
}
|
|
@ -5,12 +5,15 @@ package transformers
|
|||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
|
||||
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
||||
)
|
||||
|
||||
type GPT2 struct {
|
||||
base.Base
|
||||
|
||||
gpt2 *transformers.GPT2
|
||||
}
|
||||
|
||||
|
@ -20,16 +23,12 @@ func (llm *GPT2) Load(opts *pb.ModelOptions) error {
|
|||
return err
|
||||
}
|
||||
|
||||
func (llm *GPT2) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
|
||||
return nil, fmt.Errorf("not implemented")
|
||||
}
|
||||
|
||||
func (llm *GPT2) Predict(opts *pb.PredictOptions) (string, error) {
|
||||
return llm.gpt2.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
}
|
||||
|
||||
// fallback to Predict
|
||||
func (llm *GPT2) PredictStream(opts *pb.PredictOptions, results chan string) {
|
||||
func (llm *GPT2) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||
go func() {
|
||||
res, err := llm.gpt2.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
|
||||
|
@ -39,4 +38,5 @@ func (llm *GPT2) PredictStream(opts *pb.PredictOptions, results chan string) {
|
|||
results <- res
|
||||
close(results)
|
||||
}()
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -5,12 +5,15 @@ package transformers
|
|||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
|
||||
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
||||
)
|
||||
|
||||
type GPTJ struct {
|
||||
base.Base
|
||||
|
||||
gptj *transformers.GPTJ
|
||||
}
|
||||
|
||||
|
@ -20,16 +23,12 @@ func (llm *GPTJ) Load(opts *pb.ModelOptions) error {
|
|||
return err
|
||||
}
|
||||
|
||||
func (llm *GPTJ) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
|
||||
return nil, fmt.Errorf("not implemented")
|
||||
}
|
||||
|
||||
func (llm *GPTJ) Predict(opts *pb.PredictOptions) (string, error) {
|
||||
return llm.gptj.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
}
|
||||
|
||||
// fallback to Predict
|
||||
func (llm *GPTJ) PredictStream(opts *pb.PredictOptions, results chan string) {
|
||||
func (llm *GPTJ) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||
go func() {
|
||||
res, err := llm.gptj.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
|
||||
|
@ -39,4 +38,5 @@ func (llm *GPTJ) PredictStream(opts *pb.PredictOptions, results chan string) {
|
|||
results <- res
|
||||
close(results)
|
||||
}()
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -5,12 +5,15 @@ package transformers
|
|||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
|
||||
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
||||
)
|
||||
|
||||
type GPTNeoX struct {
|
||||
base.Base
|
||||
|
||||
gptneox *transformers.GPTNeoX
|
||||
}
|
||||
|
||||
|
@ -20,16 +23,12 @@ func (llm *GPTNeoX) Load(opts *pb.ModelOptions) error {
|
|||
return err
|
||||
}
|
||||
|
||||
func (llm *GPTNeoX) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
|
||||
return nil, fmt.Errorf("not implemented")
|
||||
}
|
||||
|
||||
func (llm *GPTNeoX) Predict(opts *pb.PredictOptions) (string, error) {
|
||||
return llm.gptneox.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
}
|
||||
|
||||
// fallback to Predict
|
||||
func (llm *GPTNeoX) PredictStream(opts *pb.PredictOptions, results chan string) {
|
||||
func (llm *GPTNeoX) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||
go func() {
|
||||
res, err := llm.gptneox.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
|
||||
|
@ -39,4 +38,5 @@ func (llm *GPTNeoX) PredictStream(opts *pb.PredictOptions, results chan string)
|
|||
results <- res
|
||||
close(results)
|
||||
}()
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -5,12 +5,15 @@ package transformers
|
|||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
|
||||
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
||||
)
|
||||
|
||||
type MPT struct {
|
||||
base.Base
|
||||
|
||||
mpt *transformers.MPT
|
||||
}
|
||||
|
||||
|
@ -20,16 +23,12 @@ func (llm *MPT) Load(opts *pb.ModelOptions) error {
|
|||
return err
|
||||
}
|
||||
|
||||
func (llm *MPT) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
|
||||
return nil, fmt.Errorf("not implemented")
|
||||
}
|
||||
|
||||
func (llm *MPT) Predict(opts *pb.PredictOptions) (string, error) {
|
||||
return llm.mpt.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
}
|
||||
|
||||
// fallback to Predict
|
||||
func (llm *MPT) PredictStream(opts *pb.PredictOptions, results chan string) {
|
||||
func (llm *MPT) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||
go func() {
|
||||
res, err := llm.mpt.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
|
||||
|
@ -39,4 +38,5 @@ func (llm *MPT) PredictStream(opts *pb.PredictOptions, results chan string) {
|
|||
results <- res
|
||||
close(results)
|
||||
}()
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -5,12 +5,15 @@ package transformers
|
|||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
|
||||
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
||||
)
|
||||
|
||||
type Replit struct {
|
||||
base.Base
|
||||
|
||||
replit *transformers.Replit
|
||||
}
|
||||
|
||||
|
@ -20,16 +23,12 @@ func (llm *Replit) Load(opts *pb.ModelOptions) error {
|
|||
return err
|
||||
}
|
||||
|
||||
func (llm *Replit) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
|
||||
return nil, fmt.Errorf("not implemented")
|
||||
}
|
||||
|
||||
func (llm *Replit) Predict(opts *pb.PredictOptions) (string, error) {
|
||||
return llm.replit.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
}
|
||||
|
||||
// fallback to Predict
|
||||
func (llm *Replit) PredictStream(opts *pb.PredictOptions, results chan string) {
|
||||
func (llm *Replit) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||
go func() {
|
||||
res, err := llm.replit.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
|
||||
|
@ -39,4 +38,5 @@ func (llm *Replit) PredictStream(opts *pb.PredictOptions, results chan string) {
|
|||
results <- res
|
||||
close(results)
|
||||
}()
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -5,12 +5,15 @@ package transformers
|
|||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
|
||||
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
||||
)
|
||||
|
||||
type Starcoder struct {
|
||||
base.Base
|
||||
|
||||
starcoder *transformers.Starcoder
|
||||
}
|
||||
|
||||
|
@ -20,16 +23,12 @@ func (llm *Starcoder) Load(opts *pb.ModelOptions) error {
|
|||
return err
|
||||
}
|
||||
|
||||
func (llm *Starcoder) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
|
||||
return nil, fmt.Errorf("not implemented")
|
||||
}
|
||||
|
||||
func (llm *Starcoder) Predict(opts *pb.PredictOptions) (string, error) {
|
||||
return llm.starcoder.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
}
|
||||
|
||||
// fallback to Predict
|
||||
func (llm *Starcoder) PredictStream(opts *pb.PredictOptions, results chan string) {
|
||||
func (llm *Starcoder) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||
go func() {
|
||||
res, err := llm.starcoder.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
|
||||
|
@ -39,4 +38,6 @@ func (llm *Starcoder) PredictStream(opts *pb.PredictOptions, results chan string
|
|||
results <- res
|
||||
close(results)
|
||||
}()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
1458
pkg/grpc/proto/backend.pb.go
Normal file
1458
pkg/grpc/proto/backend.pb.go
Normal file
File diff suppressed because it is too large
Load diff
|
@ -2,17 +2,20 @@ syntax = "proto3";
|
|||
|
||||
option go_package = "github.com/go-skynet/LocalAI/pkg/grpc/proto";
|
||||
option java_multiple_files = true;
|
||||
option java_package = "io.skynet.localai.llmserver";
|
||||
option java_outer_classname = "LLMServer";
|
||||
option java_package = "io.skynet.localai.backend";
|
||||
option java_outer_classname = "LocalAIBackend";
|
||||
|
||||
package llm;
|
||||
package backend;
|
||||
|
||||
service LLM {
|
||||
service Backend {
|
||||
rpc Health(HealthMessage) returns (Reply) {}
|
||||
rpc Predict(PredictOptions) returns (Reply) {}
|
||||
rpc LoadModel(ModelOptions) returns (Result) {}
|
||||
rpc PredictStream(PredictOptions) returns (stream Reply) {}
|
||||
rpc Embedding(PredictOptions) returns (EmbeddingResult) {}
|
||||
rpc GenerateImage(GenerateImageRequest) returns (Result) {}
|
||||
rpc AudioTranscription(TranscriptRequest) returns (TranscriptResult) {}
|
||||
rpc TTS(TTSRequest) returns (Result) {}
|
||||
}
|
||||
|
||||
message HealthMessage {}
|
||||
|
@ -87,4 +90,40 @@ message Result {
|
|||
|
||||
message EmbeddingResult {
|
||||
repeated float embeddings = 1;
|
||||
}
|
||||
}
|
||||
|
||||
message TranscriptRequest {
|
||||
string dst = 2;
|
||||
string language = 3;
|
||||
uint32 threads = 4;
|
||||
}
|
||||
|
||||
message TranscriptResult {
|
||||
repeated TranscriptSegment segments = 1;
|
||||
string text = 2;
|
||||
}
|
||||
|
||||
message TranscriptSegment {
|
||||
int32 id = 1;
|
||||
int64 start = 2;
|
||||
int64 end = 3;
|
||||
string text = 4;
|
||||
repeated int32 tokens = 5;
|
||||
}
|
||||
|
||||
message GenerateImageRequest {
|
||||
int32 height = 1;
|
||||
int32 width = 2;
|
||||
int32 mode = 3;
|
||||
int32 step = 4;
|
||||
int32 seed = 5;
|
||||
string positive_prompt = 6;
|
||||
string negative_prompt = 7;
|
||||
string dst = 8;
|
||||
}
|
||||
|
||||
message TTSRequest {
|
||||
string text = 1;
|
||||
string model = 2;
|
||||
string dst = 3;
|
||||
}
|
385
pkg/grpc/proto/backend_grpc.pb.go
Normal file
385
pkg/grpc/proto/backend_grpc.pb.go
Normal file
|
@ -0,0 +1,385 @@
|
|||
// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
|
||||
// versions:
|
||||
// - protoc-gen-go-grpc v1.2.0
|
||||
// - protoc v3.15.8
|
||||
// source: pkg/grpc/proto/backend.proto
|
||||
|
||||
package proto
|
||||
|
||||
import (
|
||||
context "context"
|
||||
grpc "google.golang.org/grpc"
|
||||
codes "google.golang.org/grpc/codes"
|
||||
status "google.golang.org/grpc/status"
|
||||
)
|
||||
|
||||
// This is a compile-time assertion to ensure that this generated file
|
||||
// is compatible with the grpc package it is being compiled against.
|
||||
// Requires gRPC-Go v1.32.0 or later.
|
||||
const _ = grpc.SupportPackageIsVersion7
|
||||
|
||||
// BackendClient is the client API for Backend service.
|
||||
//
|
||||
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
|
||||
type BackendClient interface {
|
||||
Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error)
|
||||
Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error)
|
||||
LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error)
|
||||
PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (Backend_PredictStreamClient, error)
|
||||
Embedding(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*EmbeddingResult, error)
|
||||
GenerateImage(ctx context.Context, in *GenerateImageRequest, opts ...grpc.CallOption) (*Result, error)
|
||||
AudioTranscription(ctx context.Context, in *TranscriptRequest, opts ...grpc.CallOption) (*TranscriptResult, error)
|
||||
TTS(ctx context.Context, in *TTSRequest, opts ...grpc.CallOption) (*Result, error)
|
||||
}
|
||||
|
||||
type backendClient struct {
|
||||
cc grpc.ClientConnInterface
|
||||
}
|
||||
|
||||
func NewBackendClient(cc grpc.ClientConnInterface) BackendClient {
|
||||
return &backendClient{cc}
|
||||
}
|
||||
|
||||
func (c *backendClient) Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error) {
|
||||
out := new(Reply)
|
||||
err := c.cc.Invoke(ctx, "/backend.Backend/Health", in, out, opts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func (c *backendClient) Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error) {
|
||||
out := new(Reply)
|
||||
err := c.cc.Invoke(ctx, "/backend.Backend/Predict", in, out, opts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func (c *backendClient) LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error) {
|
||||
out := new(Result)
|
||||
err := c.cc.Invoke(ctx, "/backend.Backend/LoadModel", in, out, opts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func (c *backendClient) PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (Backend_PredictStreamClient, error) {
|
||||
stream, err := c.cc.NewStream(ctx, &Backend_ServiceDesc.Streams[0], "/backend.Backend/PredictStream", opts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
x := &backendPredictStreamClient{stream}
|
||||
if err := x.ClientStream.SendMsg(in); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := x.ClientStream.CloseSend(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return x, nil
|
||||
}
|
||||
|
||||
type Backend_PredictStreamClient interface {
|
||||
Recv() (*Reply, error)
|
||||
grpc.ClientStream
|
||||
}
|
||||
|
||||
type backendPredictStreamClient struct {
|
||||
grpc.ClientStream
|
||||
}
|
||||
|
||||
func (x *backendPredictStreamClient) Recv() (*Reply, error) {
|
||||
m := new(Reply)
|
||||
if err := x.ClientStream.RecvMsg(m); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func (c *backendClient) Embedding(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*EmbeddingResult, error) {
|
||||
out := new(EmbeddingResult)
|
||||
err := c.cc.Invoke(ctx, "/backend.Backend/Embedding", in, out, opts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func (c *backendClient) GenerateImage(ctx context.Context, in *GenerateImageRequest, opts ...grpc.CallOption) (*Result, error) {
|
||||
out := new(Result)
|
||||
err := c.cc.Invoke(ctx, "/backend.Backend/GenerateImage", in, out, opts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func (c *backendClient) AudioTranscription(ctx context.Context, in *TranscriptRequest, opts ...grpc.CallOption) (*TranscriptResult, error) {
|
||||
out := new(TranscriptResult)
|
||||
err := c.cc.Invoke(ctx, "/backend.Backend/AudioTranscription", in, out, opts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func (c *backendClient) TTS(ctx context.Context, in *TTSRequest, opts ...grpc.CallOption) (*Result, error) {
|
||||
out := new(Result)
|
||||
err := c.cc.Invoke(ctx, "/backend.Backend/TTS", in, out, opts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// BackendServer is the server API for Backend service.
|
||||
// All implementations must embed UnimplementedBackendServer
|
||||
// for forward compatibility
|
||||
type BackendServer interface {
|
||||
Health(context.Context, *HealthMessage) (*Reply, error)
|
||||
Predict(context.Context, *PredictOptions) (*Reply, error)
|
||||
LoadModel(context.Context, *ModelOptions) (*Result, error)
|
||||
PredictStream(*PredictOptions, Backend_PredictStreamServer) error
|
||||
Embedding(context.Context, *PredictOptions) (*EmbeddingResult, error)
|
||||
GenerateImage(context.Context, *GenerateImageRequest) (*Result, error)
|
||||
AudioTranscription(context.Context, *TranscriptRequest) (*TranscriptResult, error)
|
||||
TTS(context.Context, *TTSRequest) (*Result, error)
|
||||
mustEmbedUnimplementedBackendServer()
|
||||
}
|
||||
|
||||
// UnimplementedBackendServer must be embedded to have forward compatible implementations.
|
||||
type UnimplementedBackendServer struct {
|
||||
}
|
||||
|
||||
func (UnimplementedBackendServer) Health(context.Context, *HealthMessage) (*Reply, error) {
|
||||
return nil, status.Errorf(codes.Unimplemented, "method Health not implemented")
|
||||
}
|
||||
func (UnimplementedBackendServer) Predict(context.Context, *PredictOptions) (*Reply, error) {
|
||||
return nil, status.Errorf(codes.Unimplemented, "method Predict not implemented")
|
||||
}
|
||||
func (UnimplementedBackendServer) LoadModel(context.Context, *ModelOptions) (*Result, error) {
|
||||
return nil, status.Errorf(codes.Unimplemented, "method LoadModel not implemented")
|
||||
}
|
||||
func (UnimplementedBackendServer) PredictStream(*PredictOptions, Backend_PredictStreamServer) error {
|
||||
return status.Errorf(codes.Unimplemented, "method PredictStream not implemented")
|
||||
}
|
||||
func (UnimplementedBackendServer) Embedding(context.Context, *PredictOptions) (*EmbeddingResult, error) {
|
||||
return nil, status.Errorf(codes.Unimplemented, "method Embedding not implemented")
|
||||
}
|
||||
func (UnimplementedBackendServer) GenerateImage(context.Context, *GenerateImageRequest) (*Result, error) {
|
||||
return nil, status.Errorf(codes.Unimplemented, "method GenerateImage not implemented")
|
||||
}
|
||||
func (UnimplementedBackendServer) AudioTranscription(context.Context, *TranscriptRequest) (*TranscriptResult, error) {
|
||||
return nil, status.Errorf(codes.Unimplemented, "method AudioTranscription not implemented")
|
||||
}
|
||||
func (UnimplementedBackendServer) TTS(context.Context, *TTSRequest) (*Result, error) {
|
||||
return nil, status.Errorf(codes.Unimplemented, "method TTS not implemented")
|
||||
}
|
||||
func (UnimplementedBackendServer) mustEmbedUnimplementedBackendServer() {}
|
||||
|
||||
// UnsafeBackendServer may be embedded to opt out of forward compatibility for this service.
|
||||
// Use of this interface is not recommended, as added methods to BackendServer will
|
||||
// result in compilation errors.
|
||||
type UnsafeBackendServer interface {
|
||||
mustEmbedUnimplementedBackendServer()
|
||||
}
|
||||
|
||||
func RegisterBackendServer(s grpc.ServiceRegistrar, srv BackendServer) {
|
||||
s.RegisterService(&Backend_ServiceDesc, srv)
|
||||
}
|
||||
|
||||
func _Backend_Health_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||
in := new(HealthMessage)
|
||||
if err := dec(in); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if interceptor == nil {
|
||||
return srv.(BackendServer).Health(ctx, in)
|
||||
}
|
||||
info := &grpc.UnaryServerInfo{
|
||||
Server: srv,
|
||||
FullMethod: "/backend.Backend/Health",
|
||||
}
|
||||
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||
return srv.(BackendServer).Health(ctx, req.(*HealthMessage))
|
||||
}
|
||||
return interceptor(ctx, in, info, handler)
|
||||
}
|
||||
|
||||
func _Backend_Predict_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||
in := new(PredictOptions)
|
||||
if err := dec(in); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if interceptor == nil {
|
||||
return srv.(BackendServer).Predict(ctx, in)
|
||||
}
|
||||
info := &grpc.UnaryServerInfo{
|
||||
Server: srv,
|
||||
FullMethod: "/backend.Backend/Predict",
|
||||
}
|
||||
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||
return srv.(BackendServer).Predict(ctx, req.(*PredictOptions))
|
||||
}
|
||||
return interceptor(ctx, in, info, handler)
|
||||
}
|
||||
|
||||
func _Backend_LoadModel_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||
in := new(ModelOptions)
|
||||
if err := dec(in); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if interceptor == nil {
|
||||
return srv.(BackendServer).LoadModel(ctx, in)
|
||||
}
|
||||
info := &grpc.UnaryServerInfo{
|
||||
Server: srv,
|
||||
FullMethod: "/backend.Backend/LoadModel",
|
||||
}
|
||||
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||
return srv.(BackendServer).LoadModel(ctx, req.(*ModelOptions))
|
||||
}
|
||||
return interceptor(ctx, in, info, handler)
|
||||
}
|
||||
|
||||
func _Backend_PredictStream_Handler(srv interface{}, stream grpc.ServerStream) error {
|
||||
m := new(PredictOptions)
|
||||
if err := stream.RecvMsg(m); err != nil {
|
||||
return err
|
||||
}
|
||||
return srv.(BackendServer).PredictStream(m, &backendPredictStreamServer{stream})
|
||||
}
|
||||
|
||||
type Backend_PredictStreamServer interface {
|
||||
Send(*Reply) error
|
||||
grpc.ServerStream
|
||||
}
|
||||
|
||||
type backendPredictStreamServer struct {
|
||||
grpc.ServerStream
|
||||
}
|
||||
|
||||
func (x *backendPredictStreamServer) Send(m *Reply) error {
|
||||
return x.ServerStream.SendMsg(m)
|
||||
}
|
||||
|
||||
func _Backend_Embedding_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||
in := new(PredictOptions)
|
||||
if err := dec(in); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if interceptor == nil {
|
||||
return srv.(BackendServer).Embedding(ctx, in)
|
||||
}
|
||||
info := &grpc.UnaryServerInfo{
|
||||
Server: srv,
|
||||
FullMethod: "/backend.Backend/Embedding",
|
||||
}
|
||||
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||
return srv.(BackendServer).Embedding(ctx, req.(*PredictOptions))
|
||||
}
|
||||
return interceptor(ctx, in, info, handler)
|
||||
}
|
||||
|
||||
func _Backend_GenerateImage_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||
in := new(GenerateImageRequest)
|
||||
if err := dec(in); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if interceptor == nil {
|
||||
return srv.(BackendServer).GenerateImage(ctx, in)
|
||||
}
|
||||
info := &grpc.UnaryServerInfo{
|
||||
Server: srv,
|
||||
FullMethod: "/backend.Backend/GenerateImage",
|
||||
}
|
||||
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||
return srv.(BackendServer).GenerateImage(ctx, req.(*GenerateImageRequest))
|
||||
}
|
||||
return interceptor(ctx, in, info, handler)
|
||||
}
|
||||
|
||||
func _Backend_AudioTranscription_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||
in := new(TranscriptRequest)
|
||||
if err := dec(in); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if interceptor == nil {
|
||||
return srv.(BackendServer).AudioTranscription(ctx, in)
|
||||
}
|
||||
info := &grpc.UnaryServerInfo{
|
||||
Server: srv,
|
||||
FullMethod: "/backend.Backend/AudioTranscription",
|
||||
}
|
||||
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||
return srv.(BackendServer).AudioTranscription(ctx, req.(*TranscriptRequest))
|
||||
}
|
||||
return interceptor(ctx, in, info, handler)
|
||||
}
|
||||
|
||||
func _Backend_TTS_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||
in := new(TTSRequest)
|
||||
if err := dec(in); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if interceptor == nil {
|
||||
return srv.(BackendServer).TTS(ctx, in)
|
||||
}
|
||||
info := &grpc.UnaryServerInfo{
|
||||
Server: srv,
|
||||
FullMethod: "/backend.Backend/TTS",
|
||||
}
|
||||
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||
return srv.(BackendServer).TTS(ctx, req.(*TTSRequest))
|
||||
}
|
||||
return interceptor(ctx, in, info, handler)
|
||||
}
|
||||
|
||||
// Backend_ServiceDesc is the grpc.ServiceDesc for Backend service.
|
||||
// It's only intended for direct use with grpc.RegisterService,
|
||||
// and not to be introspected or modified (even as a copy)
|
||||
var Backend_ServiceDesc = grpc.ServiceDesc{
|
||||
ServiceName: "backend.Backend",
|
||||
HandlerType: (*BackendServer)(nil),
|
||||
Methods: []grpc.MethodDesc{
|
||||
{
|
||||
MethodName: "Health",
|
||||
Handler: _Backend_Health_Handler,
|
||||
},
|
||||
{
|
||||
MethodName: "Predict",
|
||||
Handler: _Backend_Predict_Handler,
|
||||
},
|
||||
{
|
||||
MethodName: "LoadModel",
|
||||
Handler: _Backend_LoadModel_Handler,
|
||||
},
|
||||
{
|
||||
MethodName: "Embedding",
|
||||
Handler: _Backend_Embedding_Handler,
|
||||
},
|
||||
{
|
||||
MethodName: "GenerateImage",
|
||||
Handler: _Backend_GenerateImage_Handler,
|
||||
},
|
||||
{
|
||||
MethodName: "AudioTranscription",
|
||||
Handler: _Backend_AudioTranscription_Handler,
|
||||
},
|
||||
{
|
||||
MethodName: "TTS",
|
||||
Handler: _Backend_TTS_Handler,
|
||||
},
|
||||
},
|
||||
Streams: []grpc.StreamDesc{
|
||||
{
|
||||
StreamName: "PredictStream",
|
||||
Handler: _Backend_PredictStream_Handler,
|
||||
ServerStreams: true,
|
||||
},
|
||||
},
|
||||
Metadata: "pkg/grpc/proto/backend.proto",
|
||||
}
|
|
@ -1,969 +0,0 @@
|
|||
// Code generated by protoc-gen-go. DO NOT EDIT.
|
||||
// versions:
|
||||
// protoc-gen-go v1.26.0
|
||||
// protoc v3.15.8
|
||||
// source: pkg/grpc/proto/llmserver.proto
|
||||
|
||||
package proto
|
||||
|
||||
import (
|
||||
protoreflect "google.golang.org/protobuf/reflect/protoreflect"
|
||||
protoimpl "google.golang.org/protobuf/runtime/protoimpl"
|
||||
reflect "reflect"
|
||||
sync "sync"
|
||||
)
|
||||
|
||||
const (
|
||||
// Verify that this generated code is sufficiently up-to-date.
|
||||
_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
|
||||
// Verify that runtime/protoimpl is sufficiently up-to-date.
|
||||
_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
|
||||
)
|
||||
|
||||
type HealthMessage struct {
|
||||
state protoimpl.MessageState
|
||||
sizeCache protoimpl.SizeCache
|
||||
unknownFields protoimpl.UnknownFields
|
||||
}
|
||||
|
||||
func (x *HealthMessage) Reset() {
|
||||
*x = HealthMessage{}
|
||||
if protoimpl.UnsafeEnabled {
|
||||
mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[0]
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
}
|
||||
|
||||
func (x *HealthMessage) String() string {
|
||||
return protoimpl.X.MessageStringOf(x)
|
||||
}
|
||||
|
||||
func (*HealthMessage) ProtoMessage() {}
|
||||
|
||||
func (x *HealthMessage) ProtoReflect() protoreflect.Message {
|
||||
mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[0]
|
||||
if protoimpl.UnsafeEnabled && x != nil {
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
if ms.LoadMessageInfo() == nil {
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
return ms
|
||||
}
|
||||
return mi.MessageOf(x)
|
||||
}
|
||||
|
||||
// Deprecated: Use HealthMessage.ProtoReflect.Descriptor instead.
|
||||
func (*HealthMessage) Descriptor() ([]byte, []int) {
|
||||
return file_pkg_grpc_proto_llmserver_proto_rawDescGZIP(), []int{0}
|
||||
}
|
||||
|
||||
// The request message containing the user's name.
|
||||
type PredictOptions struct {
|
||||
state protoimpl.MessageState
|
||||
sizeCache protoimpl.SizeCache
|
||||
unknownFields protoimpl.UnknownFields
|
||||
|
||||
Prompt string `protobuf:"bytes,1,opt,name=Prompt,proto3" json:"Prompt,omitempty"`
|
||||
Seed int32 `protobuf:"varint,2,opt,name=Seed,proto3" json:"Seed,omitempty"`
|
||||
Threads int32 `protobuf:"varint,3,opt,name=Threads,proto3" json:"Threads,omitempty"`
|
||||
Tokens int32 `protobuf:"varint,4,opt,name=Tokens,proto3" json:"Tokens,omitempty"`
|
||||
TopK int32 `protobuf:"varint,5,opt,name=TopK,proto3" json:"TopK,omitempty"`
|
||||
Repeat int32 `protobuf:"varint,6,opt,name=Repeat,proto3" json:"Repeat,omitempty"`
|
||||
Batch int32 `protobuf:"varint,7,opt,name=Batch,proto3" json:"Batch,omitempty"`
|
||||
NKeep int32 `protobuf:"varint,8,opt,name=NKeep,proto3" json:"NKeep,omitempty"`
|
||||
Temperature float32 `protobuf:"fixed32,9,opt,name=Temperature,proto3" json:"Temperature,omitempty"`
|
||||
Penalty float32 `protobuf:"fixed32,10,opt,name=Penalty,proto3" json:"Penalty,omitempty"`
|
||||
F16KV bool `protobuf:"varint,11,opt,name=F16KV,proto3" json:"F16KV,omitempty"`
|
||||
DebugMode bool `protobuf:"varint,12,opt,name=DebugMode,proto3" json:"DebugMode,omitempty"`
|
||||
StopPrompts []string `protobuf:"bytes,13,rep,name=StopPrompts,proto3" json:"StopPrompts,omitempty"`
|
||||
IgnoreEOS bool `protobuf:"varint,14,opt,name=IgnoreEOS,proto3" json:"IgnoreEOS,omitempty"`
|
||||
TailFreeSamplingZ float32 `protobuf:"fixed32,15,opt,name=TailFreeSamplingZ,proto3" json:"TailFreeSamplingZ,omitempty"`
|
||||
TypicalP float32 `protobuf:"fixed32,16,opt,name=TypicalP,proto3" json:"TypicalP,omitempty"`
|
||||
FrequencyPenalty float32 `protobuf:"fixed32,17,opt,name=FrequencyPenalty,proto3" json:"FrequencyPenalty,omitempty"`
|
||||
PresencePenalty float32 `protobuf:"fixed32,18,opt,name=PresencePenalty,proto3" json:"PresencePenalty,omitempty"`
|
||||
Mirostat int32 `protobuf:"varint,19,opt,name=Mirostat,proto3" json:"Mirostat,omitempty"`
|
||||
MirostatETA float32 `protobuf:"fixed32,20,opt,name=MirostatETA,proto3" json:"MirostatETA,omitempty"`
|
||||
MirostatTAU float32 `protobuf:"fixed32,21,opt,name=MirostatTAU,proto3" json:"MirostatTAU,omitempty"`
|
||||
PenalizeNL bool `protobuf:"varint,22,opt,name=PenalizeNL,proto3" json:"PenalizeNL,omitempty"`
|
||||
LogitBias string `protobuf:"bytes,23,opt,name=LogitBias,proto3" json:"LogitBias,omitempty"`
|
||||
MLock bool `protobuf:"varint,25,opt,name=MLock,proto3" json:"MLock,omitempty"`
|
||||
MMap bool `protobuf:"varint,26,opt,name=MMap,proto3" json:"MMap,omitempty"`
|
||||
PromptCacheAll bool `protobuf:"varint,27,opt,name=PromptCacheAll,proto3" json:"PromptCacheAll,omitempty"`
|
||||
PromptCacheRO bool `protobuf:"varint,28,opt,name=PromptCacheRO,proto3" json:"PromptCacheRO,omitempty"`
|
||||
Grammar string `protobuf:"bytes,29,opt,name=Grammar,proto3" json:"Grammar,omitempty"`
|
||||
MainGPU string `protobuf:"bytes,30,opt,name=MainGPU,proto3" json:"MainGPU,omitempty"`
|
||||
TensorSplit string `protobuf:"bytes,31,opt,name=TensorSplit,proto3" json:"TensorSplit,omitempty"`
|
||||
TopP float32 `protobuf:"fixed32,32,opt,name=TopP,proto3" json:"TopP,omitempty"`
|
||||
PromptCachePath string `protobuf:"bytes,33,opt,name=PromptCachePath,proto3" json:"PromptCachePath,omitempty"`
|
||||
Debug bool `protobuf:"varint,34,opt,name=Debug,proto3" json:"Debug,omitempty"`
|
||||
EmbeddingTokens []int32 `protobuf:"varint,35,rep,packed,name=EmbeddingTokens,proto3" json:"EmbeddingTokens,omitempty"`
|
||||
Embeddings string `protobuf:"bytes,36,opt,name=Embeddings,proto3" json:"Embeddings,omitempty"`
|
||||
}
|
||||
|
||||
func (x *PredictOptions) Reset() {
|
||||
*x = PredictOptions{}
|
||||
if protoimpl.UnsafeEnabled {
|
||||
mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[1]
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
}
|
||||
|
||||
func (x *PredictOptions) String() string {
|
||||
return protoimpl.X.MessageStringOf(x)
|
||||
}
|
||||
|
||||
func (*PredictOptions) ProtoMessage() {}
|
||||
|
||||
func (x *PredictOptions) ProtoReflect() protoreflect.Message {
|
||||
mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[1]
|
||||
if protoimpl.UnsafeEnabled && x != nil {
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
if ms.LoadMessageInfo() == nil {
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
return ms
|
||||
}
|
||||
return mi.MessageOf(x)
|
||||
}
|
||||
|
||||
// Deprecated: Use PredictOptions.ProtoReflect.Descriptor instead.
|
||||
func (*PredictOptions) Descriptor() ([]byte, []int) {
|
||||
return file_pkg_grpc_proto_llmserver_proto_rawDescGZIP(), []int{1}
|
||||
}
|
||||
|
||||
func (x *PredictOptions) GetPrompt() string {
|
||||
if x != nil {
|
||||
return x.Prompt
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (x *PredictOptions) GetSeed() int32 {
|
||||
if x != nil {
|
||||
return x.Seed
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *PredictOptions) GetThreads() int32 {
|
||||
if x != nil {
|
||||
return x.Threads
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *PredictOptions) GetTokens() int32 {
|
||||
if x != nil {
|
||||
return x.Tokens
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *PredictOptions) GetTopK() int32 {
|
||||
if x != nil {
|
||||
return x.TopK
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *PredictOptions) GetRepeat() int32 {
|
||||
if x != nil {
|
||||
return x.Repeat
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *PredictOptions) GetBatch() int32 {
|
||||
if x != nil {
|
||||
return x.Batch
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *PredictOptions) GetNKeep() int32 {
|
||||
if x != nil {
|
||||
return x.NKeep
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *PredictOptions) GetTemperature() float32 {
|
||||
if x != nil {
|
||||
return x.Temperature
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *PredictOptions) GetPenalty() float32 {
|
||||
if x != nil {
|
||||
return x.Penalty
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *PredictOptions) GetF16KV() bool {
|
||||
if x != nil {
|
||||
return x.F16KV
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (x *PredictOptions) GetDebugMode() bool {
|
||||
if x != nil {
|
||||
return x.DebugMode
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (x *PredictOptions) GetStopPrompts() []string {
|
||||
if x != nil {
|
||||
return x.StopPrompts
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (x *PredictOptions) GetIgnoreEOS() bool {
|
||||
if x != nil {
|
||||
return x.IgnoreEOS
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (x *PredictOptions) GetTailFreeSamplingZ() float32 {
|
||||
if x != nil {
|
||||
return x.TailFreeSamplingZ
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *PredictOptions) GetTypicalP() float32 {
|
||||
if x != nil {
|
||||
return x.TypicalP
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *PredictOptions) GetFrequencyPenalty() float32 {
|
||||
if x != nil {
|
||||
return x.FrequencyPenalty
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *PredictOptions) GetPresencePenalty() float32 {
|
||||
if x != nil {
|
||||
return x.PresencePenalty
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *PredictOptions) GetMirostat() int32 {
|
||||
if x != nil {
|
||||
return x.Mirostat
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *PredictOptions) GetMirostatETA() float32 {
|
||||
if x != nil {
|
||||
return x.MirostatETA
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *PredictOptions) GetMirostatTAU() float32 {
|
||||
if x != nil {
|
||||
return x.MirostatTAU
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *PredictOptions) GetPenalizeNL() bool {
|
||||
if x != nil {
|
||||
return x.PenalizeNL
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (x *PredictOptions) GetLogitBias() string {
|
||||
if x != nil {
|
||||
return x.LogitBias
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (x *PredictOptions) GetMLock() bool {
|
||||
if x != nil {
|
||||
return x.MLock
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (x *PredictOptions) GetMMap() bool {
|
||||
if x != nil {
|
||||
return x.MMap
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (x *PredictOptions) GetPromptCacheAll() bool {
|
||||
if x != nil {
|
||||
return x.PromptCacheAll
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (x *PredictOptions) GetPromptCacheRO() bool {
|
||||
if x != nil {
|
||||
return x.PromptCacheRO
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (x *PredictOptions) GetGrammar() string {
|
||||
if x != nil {
|
||||
return x.Grammar
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (x *PredictOptions) GetMainGPU() string {
|
||||
if x != nil {
|
||||
return x.MainGPU
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (x *PredictOptions) GetTensorSplit() string {
|
||||
if x != nil {
|
||||
return x.TensorSplit
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (x *PredictOptions) GetTopP() float32 {
|
||||
if x != nil {
|
||||
return x.TopP
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *PredictOptions) GetPromptCachePath() string {
|
||||
if x != nil {
|
||||
return x.PromptCachePath
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (x *PredictOptions) GetDebug() bool {
|
||||
if x != nil {
|
||||
return x.Debug
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (x *PredictOptions) GetEmbeddingTokens() []int32 {
|
||||
if x != nil {
|
||||
return x.EmbeddingTokens
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (x *PredictOptions) GetEmbeddings() string {
|
||||
if x != nil {
|
||||
return x.Embeddings
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// The response message containing the result
|
||||
type Reply struct {
|
||||
state protoimpl.MessageState
|
||||
sizeCache protoimpl.SizeCache
|
||||
unknownFields protoimpl.UnknownFields
|
||||
|
||||
Message string `protobuf:"bytes,1,opt,name=message,proto3" json:"message,omitempty"`
|
||||
}
|
||||
|
||||
func (x *Reply) Reset() {
|
||||
*x = Reply{}
|
||||
if protoimpl.UnsafeEnabled {
|
||||
mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[2]
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
}
|
||||
|
||||
func (x *Reply) String() string {
|
||||
return protoimpl.X.MessageStringOf(x)
|
||||
}
|
||||
|
||||
func (*Reply) ProtoMessage() {}
|
||||
|
||||
func (x *Reply) ProtoReflect() protoreflect.Message {
|
||||
mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[2]
|
||||
if protoimpl.UnsafeEnabled && x != nil {
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
if ms.LoadMessageInfo() == nil {
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
return ms
|
||||
}
|
||||
return mi.MessageOf(x)
|
||||
}
|
||||
|
||||
// Deprecated: Use Reply.ProtoReflect.Descriptor instead.
|
||||
func (*Reply) Descriptor() ([]byte, []int) {
|
||||
return file_pkg_grpc_proto_llmserver_proto_rawDescGZIP(), []int{2}
|
||||
}
|
||||
|
||||
func (x *Reply) GetMessage() string {
|
||||
if x != nil {
|
||||
return x.Message
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
type ModelOptions struct {
|
||||
state protoimpl.MessageState
|
||||
sizeCache protoimpl.SizeCache
|
||||
unknownFields protoimpl.UnknownFields
|
||||
|
||||
Model string `protobuf:"bytes,1,opt,name=Model,proto3" json:"Model,omitempty"`
|
||||
ContextSize int32 `protobuf:"varint,2,opt,name=ContextSize,proto3" json:"ContextSize,omitempty"`
|
||||
Seed int32 `protobuf:"varint,3,opt,name=Seed,proto3" json:"Seed,omitempty"`
|
||||
NBatch int32 `protobuf:"varint,4,opt,name=NBatch,proto3" json:"NBatch,omitempty"`
|
||||
F16Memory bool `protobuf:"varint,5,opt,name=F16Memory,proto3" json:"F16Memory,omitempty"`
|
||||
MLock bool `protobuf:"varint,6,opt,name=MLock,proto3" json:"MLock,omitempty"`
|
||||
MMap bool `protobuf:"varint,7,opt,name=MMap,proto3" json:"MMap,omitempty"`
|
||||
VocabOnly bool `protobuf:"varint,8,opt,name=VocabOnly,proto3" json:"VocabOnly,omitempty"`
|
||||
LowVRAM bool `protobuf:"varint,9,opt,name=LowVRAM,proto3" json:"LowVRAM,omitempty"`
|
||||
Embeddings bool `protobuf:"varint,10,opt,name=Embeddings,proto3" json:"Embeddings,omitempty"`
|
||||
NUMA bool `protobuf:"varint,11,opt,name=NUMA,proto3" json:"NUMA,omitempty"`
|
||||
NGPULayers int32 `protobuf:"varint,12,opt,name=NGPULayers,proto3" json:"NGPULayers,omitempty"`
|
||||
MainGPU string `protobuf:"bytes,13,opt,name=MainGPU,proto3" json:"MainGPU,omitempty"`
|
||||
TensorSplit string `protobuf:"bytes,14,opt,name=TensorSplit,proto3" json:"TensorSplit,omitempty"`
|
||||
Threads int32 `protobuf:"varint,15,opt,name=Threads,proto3" json:"Threads,omitempty"`
|
||||
LibrarySearchPath string `protobuf:"bytes,16,opt,name=LibrarySearchPath,proto3" json:"LibrarySearchPath,omitempty"`
|
||||
}
|
||||
|
||||
func (x *ModelOptions) Reset() {
|
||||
*x = ModelOptions{}
|
||||
if protoimpl.UnsafeEnabled {
|
||||
mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[3]
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
}
|
||||
|
||||
func (x *ModelOptions) String() string {
|
||||
return protoimpl.X.MessageStringOf(x)
|
||||
}
|
||||
|
||||
func (*ModelOptions) ProtoMessage() {}
|
||||
|
||||
func (x *ModelOptions) ProtoReflect() protoreflect.Message {
|
||||
mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[3]
|
||||
if protoimpl.UnsafeEnabled && x != nil {
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
if ms.LoadMessageInfo() == nil {
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
return ms
|
||||
}
|
||||
return mi.MessageOf(x)
|
||||
}
|
||||
|
||||
// Deprecated: Use ModelOptions.ProtoReflect.Descriptor instead.
|
||||
func (*ModelOptions) Descriptor() ([]byte, []int) {
|
||||
return file_pkg_grpc_proto_llmserver_proto_rawDescGZIP(), []int{3}
|
||||
}
|
||||
|
||||
func (x *ModelOptions) GetModel() string {
|
||||
if x != nil {
|
||||
return x.Model
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (x *ModelOptions) GetContextSize() int32 {
|
||||
if x != nil {
|
||||
return x.ContextSize
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *ModelOptions) GetSeed() int32 {
|
||||
if x != nil {
|
||||
return x.Seed
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *ModelOptions) GetNBatch() int32 {
|
||||
if x != nil {
|
||||
return x.NBatch
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *ModelOptions) GetF16Memory() bool {
|
||||
if x != nil {
|
||||
return x.F16Memory
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (x *ModelOptions) GetMLock() bool {
|
||||
if x != nil {
|
||||
return x.MLock
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (x *ModelOptions) GetMMap() bool {
|
||||
if x != nil {
|
||||
return x.MMap
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (x *ModelOptions) GetVocabOnly() bool {
|
||||
if x != nil {
|
||||
return x.VocabOnly
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (x *ModelOptions) GetLowVRAM() bool {
|
||||
if x != nil {
|
||||
return x.LowVRAM
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (x *ModelOptions) GetEmbeddings() bool {
|
||||
if x != nil {
|
||||
return x.Embeddings
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (x *ModelOptions) GetNUMA() bool {
|
||||
if x != nil {
|
||||
return x.NUMA
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (x *ModelOptions) GetNGPULayers() int32 {
|
||||
if x != nil {
|
||||
return x.NGPULayers
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *ModelOptions) GetMainGPU() string {
|
||||
if x != nil {
|
||||
return x.MainGPU
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (x *ModelOptions) GetTensorSplit() string {
|
||||
if x != nil {
|
||||
return x.TensorSplit
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (x *ModelOptions) GetThreads() int32 {
|
||||
if x != nil {
|
||||
return x.Threads
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *ModelOptions) GetLibrarySearchPath() string {
|
||||
if x != nil {
|
||||
return x.LibrarySearchPath
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
type Result struct {
|
||||
state protoimpl.MessageState
|
||||
sizeCache protoimpl.SizeCache
|
||||
unknownFields protoimpl.UnknownFields
|
||||
|
||||
Message string `protobuf:"bytes,1,opt,name=message,proto3" json:"message,omitempty"`
|
||||
Success bool `protobuf:"varint,2,opt,name=success,proto3" json:"success,omitempty"`
|
||||
}
|
||||
|
||||
func (x *Result) Reset() {
|
||||
*x = Result{}
|
||||
if protoimpl.UnsafeEnabled {
|
||||
mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[4]
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
}
|
||||
|
||||
func (x *Result) String() string {
|
||||
return protoimpl.X.MessageStringOf(x)
|
||||
}
|
||||
|
||||
func (*Result) ProtoMessage() {}
|
||||
|
||||
func (x *Result) ProtoReflect() protoreflect.Message {
|
||||
mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[4]
|
||||
if protoimpl.UnsafeEnabled && x != nil {
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
if ms.LoadMessageInfo() == nil {
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
return ms
|
||||
}
|
||||
return mi.MessageOf(x)
|
||||
}
|
||||
|
||||
// Deprecated: Use Result.ProtoReflect.Descriptor instead.
|
||||
func (*Result) Descriptor() ([]byte, []int) {
|
||||
return file_pkg_grpc_proto_llmserver_proto_rawDescGZIP(), []int{4}
|
||||
}
|
||||
|
||||
func (x *Result) GetMessage() string {
|
||||
if x != nil {
|
||||
return x.Message
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (x *Result) GetSuccess() bool {
|
||||
if x != nil {
|
||||
return x.Success
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
type EmbeddingResult struct {
|
||||
state protoimpl.MessageState
|
||||
sizeCache protoimpl.SizeCache
|
||||
unknownFields protoimpl.UnknownFields
|
||||
|
||||
Embeddings []float32 `protobuf:"fixed32,1,rep,packed,name=embeddings,proto3" json:"embeddings,omitempty"`
|
||||
}
|
||||
|
||||
func (x *EmbeddingResult) Reset() {
|
||||
*x = EmbeddingResult{}
|
||||
if protoimpl.UnsafeEnabled {
|
||||
mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[5]
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
}
|
||||
|
||||
func (x *EmbeddingResult) String() string {
|
||||
return protoimpl.X.MessageStringOf(x)
|
||||
}
|
||||
|
||||
func (*EmbeddingResult) ProtoMessage() {}
|
||||
|
||||
func (x *EmbeddingResult) ProtoReflect() protoreflect.Message {
|
||||
mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[5]
|
||||
if protoimpl.UnsafeEnabled && x != nil {
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
if ms.LoadMessageInfo() == nil {
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
return ms
|
||||
}
|
||||
return mi.MessageOf(x)
|
||||
}
|
||||
|
||||
// Deprecated: Use EmbeddingResult.ProtoReflect.Descriptor instead.
|
||||
func (*EmbeddingResult) Descriptor() ([]byte, []int) {
|
||||
return file_pkg_grpc_proto_llmserver_proto_rawDescGZIP(), []int{5}
|
||||
}
|
||||
|
||||
func (x *EmbeddingResult) GetEmbeddings() []float32 {
|
||||
if x != nil {
|
||||
return x.Embeddings
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var File_pkg_grpc_proto_llmserver_proto protoreflect.FileDescriptor
|
||||
|
||||
var file_pkg_grpc_proto_llmserver_proto_rawDesc = []byte{
|
||||
0x0a, 0x1e, 0x70, 0x6b, 0x67, 0x2f, 0x67, 0x72, 0x70, 0x63, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f,
|
||||
0x2f, 0x6c, 0x6c, 0x6d, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f,
|
||||
0x12, 0x03, 0x6c, 0x6c, 0x6d, 0x22, 0x0f, 0x0a, 0x0d, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d,
|
||||
0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0xa0, 0x08, 0x0a, 0x0e, 0x50, 0x72, 0x65, 0x64, 0x69,
|
||||
0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x50, 0x72, 0x6f,
|
||||
0x6d, 0x70, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x50, 0x72, 0x6f, 0x6d, 0x70,
|
||||
0x74, 0x12, 0x12, 0x0a, 0x04, 0x53, 0x65, 0x65, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52,
|
||||
0x04, 0x53, 0x65, 0x65, 0x64, 0x12, 0x18, 0x0a, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73,
|
||||
0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x12,
|
||||
0x16, 0x0a, 0x06, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52,
|
||||
0x06, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x54, 0x6f, 0x70, 0x4b, 0x18,
|
||||
0x05, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x54, 0x6f, 0x70, 0x4b, 0x12, 0x16, 0x0a, 0x06, 0x52,
|
||||
0x65, 0x70, 0x65, 0x61, 0x74, 0x18, 0x06, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x52, 0x65, 0x70,
|
||||
0x65, 0x61, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x42, 0x61, 0x74, 0x63, 0x68, 0x18, 0x07, 0x20, 0x01,
|
||||
0x28, 0x05, 0x52, 0x05, 0x42, 0x61, 0x74, 0x63, 0x68, 0x12, 0x14, 0x0a, 0x05, 0x4e, 0x4b, 0x65,
|
||||
0x65, 0x70, 0x18, 0x08, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x4e, 0x4b, 0x65, 0x65, 0x70, 0x12,
|
||||
0x20, 0x0a, 0x0b, 0x54, 0x65, 0x6d, 0x70, 0x65, 0x72, 0x61, 0x74, 0x75, 0x72, 0x65, 0x18, 0x09,
|
||||
0x20, 0x01, 0x28, 0x02, 0x52, 0x0b, 0x54, 0x65, 0x6d, 0x70, 0x65, 0x72, 0x61, 0x74, 0x75, 0x72,
|
||||
0x65, 0x12, 0x18, 0x0a, 0x07, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x18, 0x0a, 0x20, 0x01,
|
||||
0x28, 0x02, 0x52, 0x07, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x46,
|
||||
0x31, 0x36, 0x4b, 0x56, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x46, 0x31, 0x36, 0x4b,
|
||||
0x56, 0x12, 0x1c, 0x0a, 0x09, 0x44, 0x65, 0x62, 0x75, 0x67, 0x4d, 0x6f, 0x64, 0x65, 0x18, 0x0c,
|
||||
0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x44, 0x65, 0x62, 0x75, 0x67, 0x4d, 0x6f, 0x64, 0x65, 0x12,
|
||||
0x20, 0x0a, 0x0b, 0x53, 0x74, 0x6f, 0x70, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x73, 0x18, 0x0d,
|
||||
0x20, 0x03, 0x28, 0x09, 0x52, 0x0b, 0x53, 0x74, 0x6f, 0x70, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74,
|
||||
0x73, 0x12, 0x1c, 0x0a, 0x09, 0x49, 0x67, 0x6e, 0x6f, 0x72, 0x65, 0x45, 0x4f, 0x53, 0x18, 0x0e,
|
||||
0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x49, 0x67, 0x6e, 0x6f, 0x72, 0x65, 0x45, 0x4f, 0x53, 0x12,
|
||||
0x2c, 0x0a, 0x11, 0x54, 0x61, 0x69, 0x6c, 0x46, 0x72, 0x65, 0x65, 0x53, 0x61, 0x6d, 0x70, 0x6c,
|
||||
0x69, 0x6e, 0x67, 0x5a, 0x18, 0x0f, 0x20, 0x01, 0x28, 0x02, 0x52, 0x11, 0x54, 0x61, 0x69, 0x6c,
|
||||
0x46, 0x72, 0x65, 0x65, 0x53, 0x61, 0x6d, 0x70, 0x6c, 0x69, 0x6e, 0x67, 0x5a, 0x12, 0x1a, 0x0a,
|
||||
0x08, 0x54, 0x79, 0x70, 0x69, 0x63, 0x61, 0x6c, 0x50, 0x18, 0x10, 0x20, 0x01, 0x28, 0x02, 0x52,
|
||||
0x08, 0x54, 0x79, 0x70, 0x69, 0x63, 0x61, 0x6c, 0x50, 0x12, 0x2a, 0x0a, 0x10, 0x46, 0x72, 0x65,
|
||||
0x71, 0x75, 0x65, 0x6e, 0x63, 0x79, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x18, 0x11, 0x20,
|
||||
0x01, 0x28, 0x02, 0x52, 0x10, 0x46, 0x72, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x63, 0x79, 0x50, 0x65,
|
||||
0x6e, 0x61, 0x6c, 0x74, 0x79, 0x12, 0x28, 0x0a, 0x0f, 0x50, 0x72, 0x65, 0x73, 0x65, 0x6e, 0x63,
|
||||
0x65, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x18, 0x12, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0f,
|
||||
0x50, 0x72, 0x65, 0x73, 0x65, 0x6e, 0x63, 0x65, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x12,
|
||||
0x1a, 0x0a, 0x08, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x18, 0x13, 0x20, 0x01, 0x28,
|
||||
0x05, 0x52, 0x08, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x12, 0x20, 0x0a, 0x0b, 0x4d,
|
||||
0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x45, 0x54, 0x41, 0x18, 0x14, 0x20, 0x01, 0x28, 0x02,
|
||||
0x52, 0x0b, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x45, 0x54, 0x41, 0x12, 0x20, 0x0a,
|
||||
0x0b, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x54, 0x41, 0x55, 0x18, 0x15, 0x20, 0x01,
|
||||
0x28, 0x02, 0x52, 0x0b, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x54, 0x41, 0x55, 0x12,
|
||||
0x1e, 0x0a, 0x0a, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x4e, 0x4c, 0x18, 0x16, 0x20,
|
||||
0x01, 0x28, 0x08, 0x52, 0x0a, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x4e, 0x4c, 0x12,
|
||||
0x1c, 0x0a, 0x09, 0x4c, 0x6f, 0x67, 0x69, 0x74, 0x42, 0x69, 0x61, 0x73, 0x18, 0x17, 0x20, 0x01,
|
||||
0x28, 0x09, 0x52, 0x09, 0x4c, 0x6f, 0x67, 0x69, 0x74, 0x42, 0x69, 0x61, 0x73, 0x12, 0x14, 0x0a,
|
||||
0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x18, 0x19, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x4d, 0x4c,
|
||||
0x6f, 0x63, 0x6b, 0x12, 0x12, 0x0a, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x18, 0x1a, 0x20, 0x01, 0x28,
|
||||
0x08, 0x52, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x12, 0x26, 0x0a, 0x0e, 0x50, 0x72, 0x6f, 0x6d, 0x70,
|
||||
0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x41, 0x6c, 0x6c, 0x18, 0x1b, 0x20, 0x01, 0x28, 0x08, 0x52,
|
||||
0x0e, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x41, 0x6c, 0x6c, 0x12,
|
||||
0x24, 0x0a, 0x0d, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x52, 0x4f,
|
||||
0x18, 0x1c, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0d, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61,
|
||||
0x63, 0x68, 0x65, 0x52, 0x4f, 0x12, 0x18, 0x0a, 0x07, 0x47, 0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72,
|
||||
0x18, 0x1d, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x47, 0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72, 0x12,
|
||||
0x18, 0x0a, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x18, 0x1e, 0x20, 0x01, 0x28, 0x09,
|
||||
0x52, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x12, 0x20, 0x0a, 0x0b, 0x54, 0x65, 0x6e,
|
||||
0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74, 0x18, 0x1f, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b,
|
||||
0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x54,
|
||||
0x6f, 0x70, 0x50, 0x18, 0x20, 0x20, 0x01, 0x28, 0x02, 0x52, 0x04, 0x54, 0x6f, 0x70, 0x50, 0x12,
|
||||
0x28, 0x0a, 0x0f, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x50, 0x61,
|
||||
0x74, 0x68, 0x18, 0x21, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0f, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74,
|
||||
0x43, 0x61, 0x63, 0x68, 0x65, 0x50, 0x61, 0x74, 0x68, 0x12, 0x14, 0x0a, 0x05, 0x44, 0x65, 0x62,
|
||||
0x75, 0x67, 0x18, 0x22, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x44, 0x65, 0x62, 0x75, 0x67, 0x12,
|
||||
0x28, 0x0a, 0x0f, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x54, 0x6f, 0x6b, 0x65,
|
||||
0x6e, 0x73, 0x18, 0x23, 0x20, 0x03, 0x28, 0x05, 0x52, 0x0f, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64,
|
||||
0x69, 0x6e, 0x67, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x12, 0x1e, 0x0a, 0x0a, 0x45, 0x6d, 0x62,
|
||||
0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x24, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x45,
|
||||
0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x22, 0x21, 0x0a, 0x05, 0x52, 0x65, 0x70,
|
||||
0x6c, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20,
|
||||
0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0xca, 0x03, 0x0a,
|
||||
0x0c, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x14, 0x0a,
|
||||
0x05, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x4d, 0x6f,
|
||||
0x64, 0x65, 0x6c, 0x12, 0x20, 0x0a, 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x53, 0x69,
|
||||
0x7a, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78,
|
||||
0x74, 0x53, 0x69, 0x7a, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x53, 0x65, 0x65, 0x64, 0x18, 0x03, 0x20,
|
||||
0x01, 0x28, 0x05, 0x52, 0x04, 0x53, 0x65, 0x65, 0x64, 0x12, 0x16, 0x0a, 0x06, 0x4e, 0x42, 0x61,
|
||||
0x74, 0x63, 0x68, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x4e, 0x42, 0x61, 0x74, 0x63,
|
||||
0x68, 0x12, 0x1c, 0x0a, 0x09, 0x46, 0x31, 0x36, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x18, 0x05,
|
||||
0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x46, 0x31, 0x36, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x12,
|
||||
0x14, 0x0a, 0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05,
|
||||
0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x12, 0x12, 0x0a, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x18, 0x07, 0x20,
|
||||
0x01, 0x28, 0x08, 0x52, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x12, 0x1c, 0x0a, 0x09, 0x56, 0x6f, 0x63,
|
||||
0x61, 0x62, 0x4f, 0x6e, 0x6c, 0x79, 0x18, 0x08, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x56, 0x6f,
|
||||
0x63, 0x61, 0x62, 0x4f, 0x6e, 0x6c, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x4c, 0x6f, 0x77, 0x56, 0x52,
|
||||
0x41, 0x4d, 0x18, 0x09, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x4c, 0x6f, 0x77, 0x56, 0x52, 0x41,
|
||||
0x4d, 0x12, 0x1e, 0x0a, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18,
|
||||
0x0a, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67,
|
||||
0x73, 0x12, 0x12, 0x0a, 0x04, 0x4e, 0x55, 0x4d, 0x41, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x08, 0x52,
|
||||
0x04, 0x4e, 0x55, 0x4d, 0x41, 0x12, 0x1e, 0x0a, 0x0a, 0x4e, 0x47, 0x50, 0x55, 0x4c, 0x61, 0x79,
|
||||
0x65, 0x72, 0x73, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0a, 0x4e, 0x47, 0x50, 0x55, 0x4c,
|
||||
0x61, 0x79, 0x65, 0x72, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55,
|
||||
0x18, 0x0d, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x12,
|
||||
0x20, 0x0a, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74, 0x18, 0x0e,
|
||||
0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69,
|
||||
0x74, 0x12, 0x18, 0x0a, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, 0x0f, 0x20, 0x01,
|
||||
0x28, 0x05, 0x52, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x12, 0x2c, 0x0a, 0x11, 0x4c,
|
||||
0x69, 0x62, 0x72, 0x61, 0x72, 0x79, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, 0x50, 0x61, 0x74, 0x68,
|
||||
0x18, 0x10, 0x20, 0x01, 0x28, 0x09, 0x52, 0x11, 0x4c, 0x69, 0x62, 0x72, 0x61, 0x72, 0x79, 0x53,
|
||||
0x65, 0x61, 0x72, 0x63, 0x68, 0x50, 0x61, 0x74, 0x68, 0x22, 0x3c, 0x0a, 0x06, 0x52, 0x65, 0x73,
|
||||
0x75, 0x6c, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01,
|
||||
0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a,
|
||||
0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07,
|
||||
0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x22, 0x31, 0x0a, 0x0f, 0x45, 0x6d, 0x62, 0x65, 0x64,
|
||||
0x64, 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x1e, 0x0a, 0x0a, 0x65, 0x6d,
|
||||
0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x02, 0x52, 0x0a,
|
||||
0x65, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x32, 0xfe, 0x01, 0x0a, 0x03, 0x4c,
|
||||
0x4c, 0x4d, 0x12, 0x2a, 0x0a, 0x06, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x12, 0x12, 0x2e, 0x6c,
|
||||
0x6c, 0x6d, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65,
|
||||
0x1a, 0x0a, 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x2c,
|
||||
0x0a, 0x07, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x12, 0x13, 0x2e, 0x6c, 0x6c, 0x6d, 0x2e,
|
||||
0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0a,
|
||||
0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x2d, 0x0a, 0x09,
|
||||
0x4c, 0x6f, 0x61, 0x64, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x11, 0x2e, 0x6c, 0x6c, 0x6d, 0x2e,
|
||||
0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0b, 0x2e, 0x6c,
|
||||
0x6c, 0x6d, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x34, 0x0a, 0x0d, 0x50,
|
||||
0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x12, 0x13, 0x2e, 0x6c,
|
||||
0x6c, 0x6d, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e,
|
||||
0x73, 0x1a, 0x0a, 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x30,
|
||||
0x01, 0x12, 0x38, 0x0a, 0x09, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x12, 0x13,
|
||||
0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69,
|
||||
0x6f, 0x6e, 0x73, 0x1a, 0x14, 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64,
|
||||
0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x42, 0x57, 0x0a, 0x1b, 0x69,
|
||||
0x6f, 0x2e, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2e, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x61, 0x69,
|
||||
0x2e, 0x6c, 0x6c, 0x6d, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x42, 0x09, 0x4c, 0x4c, 0x4d, 0x53,
|
||||
0x65, 0x72, 0x76, 0x65, 0x72, 0x50, 0x01, 0x5a, 0x2b, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e,
|
||||
0x63, 0x6f, 0x6d, 0x2f, 0x67, 0x6f, 0x2d, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2f, 0x4c, 0x6f,
|
||||
0x63, 0x61, 0x6c, 0x41, 0x49, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x67, 0x72, 0x70, 0x63, 0x2f, 0x70,
|
||||
0x72, 0x6f, 0x74, 0x6f, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
|
||||
}
|
||||
|
||||
var (
|
||||
file_pkg_grpc_proto_llmserver_proto_rawDescOnce sync.Once
|
||||
file_pkg_grpc_proto_llmserver_proto_rawDescData = file_pkg_grpc_proto_llmserver_proto_rawDesc
|
||||
)
|
||||
|
||||
func file_pkg_grpc_proto_llmserver_proto_rawDescGZIP() []byte {
|
||||
file_pkg_grpc_proto_llmserver_proto_rawDescOnce.Do(func() {
|
||||
file_pkg_grpc_proto_llmserver_proto_rawDescData = protoimpl.X.CompressGZIP(file_pkg_grpc_proto_llmserver_proto_rawDescData)
|
||||
})
|
||||
return file_pkg_grpc_proto_llmserver_proto_rawDescData
|
||||
}
|
||||
|
||||
var file_pkg_grpc_proto_llmserver_proto_msgTypes = make([]protoimpl.MessageInfo, 6)
|
||||
var file_pkg_grpc_proto_llmserver_proto_goTypes = []interface{}{
|
||||
(*HealthMessage)(nil), // 0: llm.HealthMessage
|
||||
(*PredictOptions)(nil), // 1: llm.PredictOptions
|
||||
(*Reply)(nil), // 2: llm.Reply
|
||||
(*ModelOptions)(nil), // 3: llm.ModelOptions
|
||||
(*Result)(nil), // 4: llm.Result
|
||||
(*EmbeddingResult)(nil), // 5: llm.EmbeddingResult
|
||||
}
|
||||
var file_pkg_grpc_proto_llmserver_proto_depIdxs = []int32{
|
||||
0, // 0: llm.LLM.Health:input_type -> llm.HealthMessage
|
||||
1, // 1: llm.LLM.Predict:input_type -> llm.PredictOptions
|
||||
3, // 2: llm.LLM.LoadModel:input_type -> llm.ModelOptions
|
||||
1, // 3: llm.LLM.PredictStream:input_type -> llm.PredictOptions
|
||||
1, // 4: llm.LLM.Embedding:input_type -> llm.PredictOptions
|
||||
2, // 5: llm.LLM.Health:output_type -> llm.Reply
|
||||
2, // 6: llm.LLM.Predict:output_type -> llm.Reply
|
||||
4, // 7: llm.LLM.LoadModel:output_type -> llm.Result
|
||||
2, // 8: llm.LLM.PredictStream:output_type -> llm.Reply
|
||||
5, // 9: llm.LLM.Embedding:output_type -> llm.EmbeddingResult
|
||||
5, // [5:10] is the sub-list for method output_type
|
||||
0, // [0:5] is the sub-list for method input_type
|
||||
0, // [0:0] is the sub-list for extension type_name
|
||||
0, // [0:0] is the sub-list for extension extendee
|
||||
0, // [0:0] is the sub-list for field type_name
|
||||
}
|
||||
|
||||
func init() { file_pkg_grpc_proto_llmserver_proto_init() }
|
||||
func file_pkg_grpc_proto_llmserver_proto_init() {
|
||||
if File_pkg_grpc_proto_llmserver_proto != nil {
|
||||
return
|
||||
}
|
||||
if !protoimpl.UnsafeEnabled {
|
||||
file_pkg_grpc_proto_llmserver_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} {
|
||||
switch v := v.(*HealthMessage); i {
|
||||
case 0:
|
||||
return &v.state
|
||||
case 1:
|
||||
return &v.sizeCache
|
||||
case 2:
|
||||
return &v.unknownFields
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
file_pkg_grpc_proto_llmserver_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} {
|
||||
switch v := v.(*PredictOptions); i {
|
||||
case 0:
|
||||
return &v.state
|
||||
case 1:
|
||||
return &v.sizeCache
|
||||
case 2:
|
||||
return &v.unknownFields
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
file_pkg_grpc_proto_llmserver_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} {
|
||||
switch v := v.(*Reply); i {
|
||||
case 0:
|
||||
return &v.state
|
||||
case 1:
|
||||
return &v.sizeCache
|
||||
case 2:
|
||||
return &v.unknownFields
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
file_pkg_grpc_proto_llmserver_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} {
|
||||
switch v := v.(*ModelOptions); i {
|
||||
case 0:
|
||||
return &v.state
|
||||
case 1:
|
||||
return &v.sizeCache
|
||||
case 2:
|
||||
return &v.unknownFields
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
file_pkg_grpc_proto_llmserver_proto_msgTypes[4].Exporter = func(v interface{}, i int) interface{} {
|
||||
switch v := v.(*Result); i {
|
||||
case 0:
|
||||
return &v.state
|
||||
case 1:
|
||||
return &v.sizeCache
|
||||
case 2:
|
||||
return &v.unknownFields
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
file_pkg_grpc_proto_llmserver_proto_msgTypes[5].Exporter = func(v interface{}, i int) interface{} {
|
||||
switch v := v.(*EmbeddingResult); i {
|
||||
case 0:
|
||||
return &v.state
|
||||
case 1:
|
||||
return &v.sizeCache
|
||||
case 2:
|
||||
return &v.unknownFields
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
type x struct{}
|
||||
out := protoimpl.TypeBuilder{
|
||||
File: protoimpl.DescBuilder{
|
||||
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
|
||||
RawDescriptor: file_pkg_grpc_proto_llmserver_proto_rawDesc,
|
||||
NumEnums: 0,
|
||||
NumMessages: 6,
|
||||
NumExtensions: 0,
|
||||
NumServices: 1,
|
||||
},
|
||||
GoTypes: file_pkg_grpc_proto_llmserver_proto_goTypes,
|
||||
DependencyIndexes: file_pkg_grpc_proto_llmserver_proto_depIdxs,
|
||||
MessageInfos: file_pkg_grpc_proto_llmserver_proto_msgTypes,
|
||||
}.Build()
|
||||
File_pkg_grpc_proto_llmserver_proto = out.File
|
||||
file_pkg_grpc_proto_llmserver_proto_rawDesc = nil
|
||||
file_pkg_grpc_proto_llmserver_proto_goTypes = nil
|
||||
file_pkg_grpc_proto_llmserver_proto_depIdxs = nil
|
||||
}
|
|
@ -1,277 +0,0 @@
|
|||
// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
|
||||
// versions:
|
||||
// - protoc-gen-go-grpc v1.2.0
|
||||
// - protoc v3.15.8
|
||||
// source: pkg/grpc/proto/llmserver.proto
|
||||
|
||||
package proto
|
||||
|
||||
import (
|
||||
context "context"
|
||||
grpc "google.golang.org/grpc"
|
||||
codes "google.golang.org/grpc/codes"
|
||||
status "google.golang.org/grpc/status"
|
||||
)
|
||||
|
||||
// This is a compile-time assertion to ensure that this generated file
|
||||
// is compatible with the grpc package it is being compiled against.
|
||||
// Requires gRPC-Go v1.32.0 or later.
|
||||
const _ = grpc.SupportPackageIsVersion7
|
||||
|
||||
// LLMClient is the client API for LLM service.
|
||||
//
|
||||
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
|
||||
type LLMClient interface {
|
||||
Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error)
|
||||
Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error)
|
||||
LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error)
|
||||
PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (LLM_PredictStreamClient, error)
|
||||
Embedding(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*EmbeddingResult, error)
|
||||
}
|
||||
|
||||
type lLMClient struct {
|
||||
cc grpc.ClientConnInterface
|
||||
}
|
||||
|
||||
func NewLLMClient(cc grpc.ClientConnInterface) LLMClient {
|
||||
return &lLMClient{cc}
|
||||
}
|
||||
|
||||
func (c *lLMClient) Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error) {
|
||||
out := new(Reply)
|
||||
err := c.cc.Invoke(ctx, "/llm.LLM/Health", in, out, opts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func (c *lLMClient) Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error) {
|
||||
out := new(Reply)
|
||||
err := c.cc.Invoke(ctx, "/llm.LLM/Predict", in, out, opts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func (c *lLMClient) LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error) {
|
||||
out := new(Result)
|
||||
err := c.cc.Invoke(ctx, "/llm.LLM/LoadModel", in, out, opts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func (c *lLMClient) PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (LLM_PredictStreamClient, error) {
|
||||
stream, err := c.cc.NewStream(ctx, &LLM_ServiceDesc.Streams[0], "/llm.LLM/PredictStream", opts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
x := &lLMPredictStreamClient{stream}
|
||||
if err := x.ClientStream.SendMsg(in); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := x.ClientStream.CloseSend(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return x, nil
|
||||
}
|
||||
|
||||
type LLM_PredictStreamClient interface {
|
||||
Recv() (*Reply, error)
|
||||
grpc.ClientStream
|
||||
}
|
||||
|
||||
type lLMPredictStreamClient struct {
|
||||
grpc.ClientStream
|
||||
}
|
||||
|
||||
func (x *lLMPredictStreamClient) Recv() (*Reply, error) {
|
||||
m := new(Reply)
|
||||
if err := x.ClientStream.RecvMsg(m); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func (c *lLMClient) Embedding(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*EmbeddingResult, error) {
|
||||
out := new(EmbeddingResult)
|
||||
err := c.cc.Invoke(ctx, "/llm.LLM/Embedding", in, out, opts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// LLMServer is the server API for LLM service.
|
||||
// All implementations must embed UnimplementedLLMServer
|
||||
// for forward compatibility
|
||||
type LLMServer interface {
|
||||
Health(context.Context, *HealthMessage) (*Reply, error)
|
||||
Predict(context.Context, *PredictOptions) (*Reply, error)
|
||||
LoadModel(context.Context, *ModelOptions) (*Result, error)
|
||||
PredictStream(*PredictOptions, LLM_PredictStreamServer) error
|
||||
Embedding(context.Context, *PredictOptions) (*EmbeddingResult, error)
|
||||
mustEmbedUnimplementedLLMServer()
|
||||
}
|
||||
|
||||
// UnimplementedLLMServer must be embedded to have forward compatible implementations.
|
||||
type UnimplementedLLMServer struct {
|
||||
}
|
||||
|
||||
func (UnimplementedLLMServer) Health(context.Context, *HealthMessage) (*Reply, error) {
|
||||
return nil, status.Errorf(codes.Unimplemented, "method Health not implemented")
|
||||
}
|
||||
func (UnimplementedLLMServer) Predict(context.Context, *PredictOptions) (*Reply, error) {
|
||||
return nil, status.Errorf(codes.Unimplemented, "method Predict not implemented")
|
||||
}
|
||||
func (UnimplementedLLMServer) LoadModel(context.Context, *ModelOptions) (*Result, error) {
|
||||
return nil, status.Errorf(codes.Unimplemented, "method LoadModel not implemented")
|
||||
}
|
||||
func (UnimplementedLLMServer) PredictStream(*PredictOptions, LLM_PredictStreamServer) error {
|
||||
return status.Errorf(codes.Unimplemented, "method PredictStream not implemented")
|
||||
}
|
||||
func (UnimplementedLLMServer) Embedding(context.Context, *PredictOptions) (*EmbeddingResult, error) {
|
||||
return nil, status.Errorf(codes.Unimplemented, "method Embedding not implemented")
|
||||
}
|
||||
func (UnimplementedLLMServer) mustEmbedUnimplementedLLMServer() {}
|
||||
|
||||
// UnsafeLLMServer may be embedded to opt out of forward compatibility for this service.
|
||||
// Use of this interface is not recommended, as added methods to LLMServer will
|
||||
// result in compilation errors.
|
||||
type UnsafeLLMServer interface {
|
||||
mustEmbedUnimplementedLLMServer()
|
||||
}
|
||||
|
||||
func RegisterLLMServer(s grpc.ServiceRegistrar, srv LLMServer) {
|
||||
s.RegisterService(&LLM_ServiceDesc, srv)
|
||||
}
|
||||
|
||||
func _LLM_Health_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||
in := new(HealthMessage)
|
||||
if err := dec(in); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if interceptor == nil {
|
||||
return srv.(LLMServer).Health(ctx, in)
|
||||
}
|
||||
info := &grpc.UnaryServerInfo{
|
||||
Server: srv,
|
||||
FullMethod: "/llm.LLM/Health",
|
||||
}
|
||||
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||
return srv.(LLMServer).Health(ctx, req.(*HealthMessage))
|
||||
}
|
||||
return interceptor(ctx, in, info, handler)
|
||||
}
|
||||
|
||||
func _LLM_Predict_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||
in := new(PredictOptions)
|
||||
if err := dec(in); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if interceptor == nil {
|
||||
return srv.(LLMServer).Predict(ctx, in)
|
||||
}
|
||||
info := &grpc.UnaryServerInfo{
|
||||
Server: srv,
|
||||
FullMethod: "/llm.LLM/Predict",
|
||||
}
|
||||
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||
return srv.(LLMServer).Predict(ctx, req.(*PredictOptions))
|
||||
}
|
||||
return interceptor(ctx, in, info, handler)
|
||||
}
|
||||
|
||||
func _LLM_LoadModel_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||
in := new(ModelOptions)
|
||||
if err := dec(in); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if interceptor == nil {
|
||||
return srv.(LLMServer).LoadModel(ctx, in)
|
||||
}
|
||||
info := &grpc.UnaryServerInfo{
|
||||
Server: srv,
|
||||
FullMethod: "/llm.LLM/LoadModel",
|
||||
}
|
||||
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||
return srv.(LLMServer).LoadModel(ctx, req.(*ModelOptions))
|
||||
}
|
||||
return interceptor(ctx, in, info, handler)
|
||||
}
|
||||
|
||||
func _LLM_PredictStream_Handler(srv interface{}, stream grpc.ServerStream) error {
|
||||
m := new(PredictOptions)
|
||||
if err := stream.RecvMsg(m); err != nil {
|
||||
return err
|
||||
}
|
||||
return srv.(LLMServer).PredictStream(m, &lLMPredictStreamServer{stream})
|
||||
}
|
||||
|
||||
type LLM_PredictStreamServer interface {
|
||||
Send(*Reply) error
|
||||
grpc.ServerStream
|
||||
}
|
||||
|
||||
type lLMPredictStreamServer struct {
|
||||
grpc.ServerStream
|
||||
}
|
||||
|
||||
func (x *lLMPredictStreamServer) Send(m *Reply) error {
|
||||
return x.ServerStream.SendMsg(m)
|
||||
}
|
||||
|
||||
func _LLM_Embedding_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||
in := new(PredictOptions)
|
||||
if err := dec(in); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if interceptor == nil {
|
||||
return srv.(LLMServer).Embedding(ctx, in)
|
||||
}
|
||||
info := &grpc.UnaryServerInfo{
|
||||
Server: srv,
|
||||
FullMethod: "/llm.LLM/Embedding",
|
||||
}
|
||||
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||
return srv.(LLMServer).Embedding(ctx, req.(*PredictOptions))
|
||||
}
|
||||
return interceptor(ctx, in, info, handler)
|
||||
}
|
||||
|
||||
// LLM_ServiceDesc is the grpc.ServiceDesc for LLM service.
|
||||
// It's only intended for direct use with grpc.RegisterService,
|
||||
// and not to be introspected or modified (even as a copy)
|
||||
var LLM_ServiceDesc = grpc.ServiceDesc{
|
||||
ServiceName: "llm.LLM",
|
||||
HandlerType: (*LLMServer)(nil),
|
||||
Methods: []grpc.MethodDesc{
|
||||
{
|
||||
MethodName: "Health",
|
||||
Handler: _LLM_Health_Handler,
|
||||
},
|
||||
{
|
||||
MethodName: "Predict",
|
||||
Handler: _LLM_Predict_Handler,
|
||||
},
|
||||
{
|
||||
MethodName: "LoadModel",
|
||||
Handler: _LLM_LoadModel_Handler,
|
||||
},
|
||||
{
|
||||
MethodName: "Embedding",
|
||||
Handler: _LLM_Embedding_Handler,
|
||||
},
|
||||
},
|
||||
Streams: []grpc.StreamDesc{
|
||||
{
|
||||
StreamName: "PredictStream",
|
||||
Handler: _LLM_PredictStream_Handler,
|
||||
ServerStreams: true,
|
||||
},
|
||||
},
|
||||
Metadata: "pkg/grpc/proto/llmserver.proto",
|
||||
}
|
|
@ -21,7 +21,7 @@ import (
|
|||
|
||||
// server is used to implement helloworld.GreeterServer.
|
||||
type server struct {
|
||||
pb.UnimplementedLLMServer
|
||||
pb.UnimplementedBackendServer
|
||||
llm LLM
|
||||
}
|
||||
|
||||
|
@ -51,7 +51,48 @@ func (s *server) Predict(ctx context.Context, in *pb.PredictOptions) (*pb.Reply,
|
|||
return &pb.Reply{Message: result}, err
|
||||
}
|
||||
|
||||
func (s *server) PredictStream(in *pb.PredictOptions, stream pb.LLM_PredictStreamServer) error {
|
||||
func (s *server) GenerateImage(ctx context.Context, in *pb.GenerateImageRequest) (*pb.Result, error) {
|
||||
err := s.llm.GenerateImage(in)
|
||||
if err != nil {
|
||||
return &pb.Result{Message: fmt.Sprintf("Error generating image: %s", err.Error()), Success: false}, err
|
||||
}
|
||||
return &pb.Result{Message: "Image generated", Success: true}, nil
|
||||
}
|
||||
|
||||
func (s *server) TTS(ctx context.Context, in *pb.TTSRequest) (*pb.Result, error) {
|
||||
err := s.llm.TTS(in)
|
||||
if err != nil {
|
||||
return &pb.Result{Message: fmt.Sprintf("Error generating audio: %s", err.Error()), Success: false}, err
|
||||
}
|
||||
return &pb.Result{Message: "Audio generated", Success: true}, nil
|
||||
}
|
||||
|
||||
func (s *server) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest) (*pb.TranscriptResult, error) {
|
||||
result, err := s.llm.AudioTranscription(in)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tresult := &pb.TranscriptResult{}
|
||||
for _, s := range result.Segments {
|
||||
tks := []int32{}
|
||||
for _, t := range s.Tokens {
|
||||
tks = append(tks, int32(t))
|
||||
}
|
||||
tresult.Segments = append(tresult.Segments,
|
||||
&pb.TranscriptSegment{
|
||||
Text: s.Text,
|
||||
Id: int32(s.Id),
|
||||
Start: int64(s.Start),
|
||||
End: int64(s.End),
|
||||
Tokens: tks,
|
||||
})
|
||||
}
|
||||
|
||||
tresult.Text = result.Text
|
||||
return tresult, nil
|
||||
}
|
||||
|
||||
func (s *server) PredictStream(in *pb.PredictOptions, stream pb.Backend_PredictStreamServer) error {
|
||||
|
||||
resultChan := make(chan string)
|
||||
|
||||
|
@ -75,7 +116,7 @@ func StartServer(address string, model LLM) error {
|
|||
return err
|
||||
}
|
||||
s := grpc.NewServer()
|
||||
pb.RegisterLLMServer(s, &server{llm: model})
|
||||
pb.RegisterBackendServer(s, &server{llm: model})
|
||||
log.Printf("gRPC Server listening at %v", lis.Addr())
|
||||
if err := s.Serve(lis); err != nil {
|
||||
return err
|
||||
|
|
27
pkg/grpc/transcribe/whisper.go
Normal file
27
pkg/grpc/transcribe/whisper.go
Normal file
|
@ -0,0 +1,27 @@
|
|||
package transcribe
|
||||
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
import (
|
||||
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
whisperutil "github.com/go-skynet/LocalAI/pkg/grpc/whisper"
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/whisper/api"
|
||||
)
|
||||
|
||||
type Whisper struct {
|
||||
base.Base
|
||||
whisper whisper.Model
|
||||
}
|
||||
|
||||
func (sd *Whisper) Load(opts *pb.ModelOptions) error {
|
||||
// Note: the Model here is a path to a directory containing the model files
|
||||
w, err := whisper.New(opts.Model)
|
||||
sd.whisper = w
|
||||
return err
|
||||
}
|
||||
|
||||
func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (api.Result, error) {
|
||||
return whisperutil.Transcript(sd.whisper, opts.Dst, opts.Language, uint(opts.Threads))
|
||||
}
|
44
pkg/grpc/tts/piper.go
Normal file
44
pkg/grpc/tts/piper.go
Normal file
|
@ -0,0 +1,44 @@
|
|||
package tts
|
||||
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
import (
|
||||
"os"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
piper "github.com/mudler/go-piper"
|
||||
)
|
||||
|
||||
type Piper struct {
|
||||
base.Base
|
||||
piper *PiperB
|
||||
}
|
||||
|
||||
func (sd *Piper) Load(opts *pb.ModelOptions) error {
|
||||
var err error
|
||||
// Note: the Model here is a path to a directory containing the model files
|
||||
sd.piper, err = New(opts.LibrarySearchPath)
|
||||
return err
|
||||
}
|
||||
|
||||
func (sd *Piper) TTS(opts *pb.TTSRequest) error {
|
||||
return sd.piper.TTS(opts.Text, opts.Model, opts.Dst)
|
||||
}
|
||||
|
||||
type PiperB struct {
|
||||
assetDir string
|
||||
}
|
||||
|
||||
func New(assetDir string) (*PiperB, error) {
|
||||
if _, err := os.Stat(assetDir); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &PiperB{
|
||||
assetDir: assetDir,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (s *PiperB) TTS(text, model, dst string) error {
|
||||
return piper.TextToWav(text, model, s.assetDir, "", dst)
|
||||
}
|
16
pkg/grpc/whisper/api/api.go
Normal file
16
pkg/grpc/whisper/api/api.go
Normal file
|
@ -0,0 +1,16 @@
|
|||
package api
|
||||
|
||||
import "time"
|
||||
|
||||
type Segment struct {
|
||||
Id int `json:"id"`
|
||||
Start time.Duration `json:"start"`
|
||||
End time.Duration `json:"end"`
|
||||
Text string `json:"text"`
|
||||
Tokens []int `json:"tokens"`
|
||||
}
|
||||
|
||||
type Result struct {
|
||||
Segments []Segment `json:"segments"`
|
||||
Text string `json:"text"`
|
||||
}
|
|
@ -5,25 +5,12 @@ import (
|
|||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
||||
wav "github.com/go-audio/wav"
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/whisper/api"
|
||||
)
|
||||
|
||||
type Segment struct {
|
||||
Id int `json:"id"`
|
||||
Start time.Duration `json:"start"`
|
||||
End time.Duration `json:"end"`
|
||||
Text string `json:"text"`
|
||||
Tokens []int `json:"tokens"`
|
||||
}
|
||||
|
||||
type Result struct {
|
||||
Segments []Segment `json:"segments"`
|
||||
Text string `json:"text"`
|
||||
}
|
||||
|
||||
func sh(c string) (string, error) {
|
||||
cmd := exec.Command("/bin/sh", "-c", c)
|
||||
cmd.Env = os.Environ()
|
||||
|
@ -42,8 +29,8 @@ func audioToWav(src, dst string) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func Transcript(model whisper.Model, audiopath, language string, threads uint) (Result, error) {
|
||||
res := Result{}
|
||||
func Transcript(model whisper.Model, audiopath, language string, threads uint) (api.Result, error) {
|
||||
res := api.Result{}
|
||||
|
||||
dir, err := os.MkdirTemp("", "whisper")
|
||||
if err != nil {
|
||||
|
@ -99,11 +86,11 @@ func Transcript(model whisper.Model, audiopath, language string, threads uint) (
|
|||
}
|
||||
|
||||
var tokens []int
|
||||
for _, t := range(s.Tokens) {
|
||||
for _, t := range s.Tokens {
|
||||
tokens = append(tokens, t.Id)
|
||||
}
|
||||
|
||||
segment := Segment{Id: s.Num, Text: s.Text, Start:s.Start, End: s.End, Tokens: tokens}
|
||||
segment := api.Segment{Id: s.Num, Text: s.Text, Start: s.Start, End: s.End, Tokens: tokens}
|
||||
res.Segments = append(res.Segments, segment)
|
||||
|
||||
res.Text += s.Text
|
|
@ -4,18 +4,13 @@ import (
|
|||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
rwkv "github.com/donomii/go-rwkv.cpp"
|
||||
whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
"github.com/go-skynet/LocalAI/pkg/langchain"
|
||||
"github.com/go-skynet/LocalAI/pkg/stablediffusion"
|
||||
"github.com/go-skynet/LocalAI/pkg/tts"
|
||||
bloomz "github.com/go-skynet/bloomz.cpp"
|
||||
bert "github.com/go-skynet/go-bert.cpp"
|
||||
"github.com/hashicorp/go-multierror"
|
||||
"github.com/hpcloud/tail"
|
||||
"github.com/phayes/freeport"
|
||||
|
@ -27,20 +22,22 @@ import (
|
|||
const tokenizerSuffix = ".tokenizer.json"
|
||||
|
||||
const (
|
||||
LlamaBackend = "llama"
|
||||
BloomzBackend = "bloomz"
|
||||
StarcoderBackend = "starcoder"
|
||||
GPTJBackend = "gptj"
|
||||
DollyBackend = "dolly"
|
||||
MPTBackend = "mpt"
|
||||
GPTNeoXBackend = "gptneox"
|
||||
ReplitBackend = "replit"
|
||||
Gpt2Backend = "gpt2"
|
||||
Gpt4AllLlamaBackend = "gpt4all-llama"
|
||||
Gpt4AllMptBackend = "gpt4all-mpt"
|
||||
Gpt4AllJBackend = "gpt4all-j"
|
||||
Gpt4All = "gpt4all"
|
||||
FalconBackend = "falcon"
|
||||
LlamaBackend = "llama"
|
||||
BloomzBackend = "bloomz"
|
||||
StarcoderBackend = "starcoder"
|
||||
GPTJBackend = "gptj"
|
||||
DollyBackend = "dolly"
|
||||
MPTBackend = "mpt"
|
||||
GPTNeoXBackend = "gptneox"
|
||||
ReplitBackend = "replit"
|
||||
Gpt2Backend = "gpt2"
|
||||
Gpt4AllLlamaBackend = "gpt4all-llama"
|
||||
Gpt4AllMptBackend = "gpt4all-mpt"
|
||||
Gpt4AllJBackend = "gpt4all-j"
|
||||
Gpt4All = "gpt4all"
|
||||
FalconBackend = "falcon"
|
||||
FalconGGMLBackend = "falcon-ggml"
|
||||
|
||||
BertEmbeddingsBackend = "bert-embeddings"
|
||||
RwkvBackend = "rwkv"
|
||||
WhisperBackend = "whisper"
|
||||
|
@ -54,77 +51,39 @@ var autoLoadBackends []string = []string{
|
|||
LlamaBackend,
|
||||
Gpt4All,
|
||||
RwkvBackend,
|
||||
FalconBackend,
|
||||
WhisperBackend,
|
||||
BertEmbeddingsBackend,
|
||||
GPTNeoXBackend,
|
||||
BertEmbeddingsBackend,
|
||||
FalconGGMLBackend,
|
||||
GPTJBackend,
|
||||
Gpt2Backend,
|
||||
DollyBackend,
|
||||
MPTBackend,
|
||||
ReplitBackend,
|
||||
StarcoderBackend,
|
||||
FalconBackend,
|
||||
BloomzBackend,
|
||||
}
|
||||
|
||||
var bertEmbeddings = func(modelFile string) (interface{}, error) {
|
||||
return bert.New(modelFile)
|
||||
}
|
||||
|
||||
var bloomzLM = func(modelFile string) (interface{}, error) {
|
||||
return bloomz.New(modelFile)
|
||||
}
|
||||
|
||||
var stableDiffusion = func(assetDir string) (interface{}, error) {
|
||||
return stablediffusion.New(assetDir)
|
||||
}
|
||||
|
||||
func piperTTS(assetDir string) func(s string) (interface{}, error) {
|
||||
return func(s string) (interface{}, error) {
|
||||
return tts.New(assetDir)
|
||||
}
|
||||
}
|
||||
|
||||
var whisperModel = func(modelFile string) (interface{}, error) {
|
||||
return whisper.New(modelFile)
|
||||
}
|
||||
|
||||
var lcHuggingFace = func(repoId string) (interface{}, error) {
|
||||
return langchain.NewHuggingFace(repoId)
|
||||
}
|
||||
|
||||
// func llamaLM(opts ...llama.ModelOption) func(string) (interface{}, error) {
|
||||
// return func(s string) (interface{}, error) {
|
||||
// return llama.New(s, opts...)
|
||||
// }
|
||||
// }
|
||||
|
||||
// func gpt4allLM(opts ...gpt4all.ModelOption) func(string) (interface{}, error) {
|
||||
// return func(s string) (interface{}, error) {
|
||||
// return gpt4all.New(s, opts...)
|
||||
// }
|
||||
// }
|
||||
|
||||
func rwkvLM(tokenFile string, threads uint32) func(string) (interface{}, error) {
|
||||
return func(s string) (interface{}, error) {
|
||||
log.Debug().Msgf("Loading RWKV", s, tokenFile)
|
||||
|
||||
model := rwkv.LoadFiles(s, tokenFile, threads)
|
||||
if model == nil {
|
||||
return nil, fmt.Errorf("could not load model")
|
||||
}
|
||||
return model, nil
|
||||
func (ml *ModelLoader) StopGRPC() {
|
||||
for _, p := range ml.grpcProcesses {
|
||||
p.Stop()
|
||||
}
|
||||
}
|
||||
|
||||
// starts the grpcModelProcess for the backend, and returns a grpc client
|
||||
// It also loads the model
|
||||
func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string) (interface{}, error) {
|
||||
return func(s string) (interface{}, error) {
|
||||
func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string) (*grpc.Client, error) {
|
||||
return func(s string) (*grpc.Client, error) {
|
||||
log.Debug().Msgf("Loading GRPC Model", backend, *o)
|
||||
|
||||
grpcProcess := filepath.Join(o.assetDir, "backend-assets", "grpc", backend)
|
||||
|
||||
// Check if the file exists
|
||||
if _, err := os.Stat(grpcProcess); os.IsNotExist(err) {
|
||||
return nil, fmt.Errorf("grpc process not found: %s. some backends(stablediffusion, tts) require LocalAI compiled with GO_TAGS", grpcProcess)
|
||||
}
|
||||
|
||||
// Make sure the process is executable
|
||||
if err := os.Chmod(grpcProcess, 0755); err != nil {
|
||||
return nil, err
|
||||
|
@ -151,6 +110,14 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string) (inter
|
|||
return nil, err
|
||||
}
|
||||
|
||||
// clean up process
|
||||
go func() {
|
||||
c := make(chan os.Signal, 1)
|
||||
signal.Notify(c, os.Interrupt, syscall.SIGTERM)
|
||||
<-c
|
||||
grpcControlProcess.Stop()
|
||||
}()
|
||||
|
||||
go func() {
|
||||
t, err := tail.TailFile(grpcControlProcess.StderrPath(), tail.Config{Follow: true})
|
||||
if err != nil {
|
||||
|
@ -200,7 +167,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string) (inter
|
|||
|
||||
log.Debug().Msgf("GRPC: Loading model with options: %+v", options)
|
||||
|
||||
res, err := client.LoadModel(context.TODO(), &options)
|
||||
res, err := client.LoadModel(o.context, &options)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -212,63 +179,37 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string) (inter
|
|||
}
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) BackendLoader(opts ...Option) (model interface{}, err error) {
|
||||
|
||||
//backendString string, modelFile string, llamaOpts []llama.ModelOption, threads uint32, assetDir string) (model interface{}, err error) {
|
||||
|
||||
func (ml *ModelLoader) BackendLoader(opts ...Option) (model *grpc.Client, err error) {
|
||||
o := NewOptions(opts...)
|
||||
|
||||
log.Debug().Msgf("Loading model %s from %s", o.backendString, o.modelFile)
|
||||
switch strings.ToLower(o.backendString) {
|
||||
case LlamaBackend:
|
||||
return ml.LoadModel(o.modelFile, ml.grpcModel(LlamaBackend, o))
|
||||
case BloomzBackend:
|
||||
return ml.LoadModel(o.modelFile, bloomzLM)
|
||||
case GPTJBackend:
|
||||
return ml.LoadModel(o.modelFile, ml.grpcModel(GPTJBackend, o))
|
||||
case DollyBackend:
|
||||
return ml.LoadModel(o.modelFile, ml.grpcModel(DollyBackend, o))
|
||||
case MPTBackend:
|
||||
return ml.LoadModel(o.modelFile, ml.grpcModel(MPTBackend, o))
|
||||
case Gpt2Backend:
|
||||
return ml.LoadModel(o.modelFile, ml.grpcModel(Gpt2Backend, o))
|
||||
case FalconBackend:
|
||||
return ml.LoadModel(o.modelFile, ml.grpcModel(FalconBackend, o))
|
||||
case GPTNeoXBackend:
|
||||
return ml.LoadModel(o.modelFile, ml.grpcModel(GPTNeoXBackend, o))
|
||||
case ReplitBackend:
|
||||
return ml.LoadModel(o.modelFile, ml.grpcModel(ReplitBackend, o))
|
||||
case StableDiffusionBackend:
|
||||
return ml.LoadModel(o.modelFile, stableDiffusion)
|
||||
case PiperBackend:
|
||||
return ml.LoadModel(o.modelFile, piperTTS(filepath.Join(o.assetDir, "backend-assets", "espeak-ng-data")))
|
||||
case StarcoderBackend:
|
||||
return ml.LoadModel(o.modelFile, ml.grpcModel(StarcoderBackend, o))
|
||||
|
||||
backend := strings.ToLower(o.backendString)
|
||||
switch backend {
|
||||
case LlamaBackend, GPTJBackend, DollyBackend,
|
||||
MPTBackend, Gpt2Backend, FalconBackend,
|
||||
GPTNeoXBackend, ReplitBackend, StarcoderBackend, BloomzBackend,
|
||||
RwkvBackend, LCHuggingFaceBackend, BertEmbeddingsBackend, FalconGGMLBackend, StableDiffusionBackend, WhisperBackend:
|
||||
return ml.LoadModel(o.modelFile, ml.grpcModel(backend, o))
|
||||
case Gpt4AllLlamaBackend, Gpt4AllMptBackend, Gpt4AllJBackend, Gpt4All:
|
||||
o.gRPCOptions.LibrarySearchPath = filepath.Join(o.assetDir, "backend-assets", "gpt4all")
|
||||
return ml.LoadModel(o.modelFile, ml.grpcModel(Gpt4All, o))
|
||||
// return ml.LoadModel(o.modelFile, gpt4allLM(gpt4all.SetThreads(int(o.threads)), gpt4all.SetLibrarySearchPath(filepath.Join(o.assetDir, "backend-assets", "gpt4all"))))
|
||||
case BertEmbeddingsBackend:
|
||||
return ml.LoadModel(o.modelFile, bertEmbeddings)
|
||||
case RwkvBackend:
|
||||
return ml.LoadModel(o.modelFile, rwkvLM(filepath.Join(ml.ModelPath, o.modelFile+tokenizerSuffix), o.threads))
|
||||
case WhisperBackend:
|
||||
return ml.LoadModel(o.modelFile, whisperModel)
|
||||
case LCHuggingFaceBackend:
|
||||
return ml.LoadModel(o.modelFile, lcHuggingFace)
|
||||
case PiperBackend:
|
||||
o.gRPCOptions.LibrarySearchPath = filepath.Join(o.assetDir, "backend-assets", "espeak-ng-data")
|
||||
return ml.LoadModel(o.modelFile, ml.grpcModel(PiperBackend, o))
|
||||
default:
|
||||
return nil, fmt.Errorf("backend unsupported: %s", o.backendString)
|
||||
}
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) GreedyLoader(opts ...Option) (interface{}, error) {
|
||||
func (ml *ModelLoader) GreedyLoader(opts ...Option) (*grpc.Client, error) {
|
||||
o := NewOptions(opts...)
|
||||
|
||||
log.Debug().Msgf("Loading model '%s' greedly", o.modelFile)
|
||||
|
||||
// Is this really needed? BackendLoader already does this
|
||||
ml.mu.Lock()
|
||||
m, exists := ml.models[o.modelFile]
|
||||
if exists {
|
||||
if m := ml.checkIsLoaded(o.modelFile); m != nil {
|
||||
log.Debug().Msgf("Model '%s' already loaded", o.modelFile)
|
||||
ml.mu.Unlock()
|
||||
return m, nil
|
||||
|
@ -285,7 +226,7 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (interface{}, error) {
|
|||
model, modelerr := ml.BackendLoader(
|
||||
WithBackendString(b),
|
||||
WithModelFile(o.modelFile),
|
||||
WithLoadGRPCOpts(o.gRPCOptions),
|
||||
WithLoadGRPCLLMModelOpts(o.gRPCOptions),
|
||||
WithThreads(o.threads),
|
||||
WithAssetDir(o.assetDir),
|
||||
)
|
||||
|
|
|
@ -2,6 +2,7 @@ package model
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
|
@ -10,6 +11,7 @@ import (
|
|||
"sync"
|
||||
"text/template"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
process "github.com/mudler/go-processmanager"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
@ -18,7 +20,7 @@ type ModelLoader struct {
|
|||
ModelPath string
|
||||
mu sync.Mutex
|
||||
// TODO: this needs generics
|
||||
models map[string]interface{}
|
||||
models map[string]*grpc.Client
|
||||
grpcProcesses map[string]*process.Process
|
||||
promptsTemplates map[string]*template.Template
|
||||
}
|
||||
|
@ -26,7 +28,7 @@ type ModelLoader struct {
|
|||
func NewModelLoader(modelPath string) *ModelLoader {
|
||||
return &ModelLoader{
|
||||
ModelPath: modelPath,
|
||||
models: make(map[string]interface{}),
|
||||
models: make(map[string]*grpc.Client),
|
||||
promptsTemplates: make(map[string]*template.Template),
|
||||
grpcProcesses: make(map[string]*process.Process),
|
||||
}
|
||||
|
@ -113,14 +115,14 @@ func (ml *ModelLoader) loadTemplateIfExists(modelName, modelFile string) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) LoadModel(modelName string, loader func(string) (interface{}, error)) (interface{}, error) {
|
||||
func (ml *ModelLoader) LoadModel(modelName string, loader func(string) (*grpc.Client, error)) (*grpc.Client, error) {
|
||||
ml.mu.Lock()
|
||||
defer ml.mu.Unlock()
|
||||
|
||||
// Check if we already have a loaded model
|
||||
if m, ok := ml.models[modelName]; ok {
|
||||
if model := ml.checkIsLoaded(modelName); model != nil {
|
||||
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
||||
return m, nil
|
||||
return model, nil
|
||||
}
|
||||
|
||||
// Load the model and keep it in memory for later use
|
||||
|
@ -140,3 +142,25 @@ func (ml *ModelLoader) LoadModel(modelName string, loader func(string) (interfac
|
|||
ml.models[modelName] = model
|
||||
return model, nil
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) checkIsLoaded(s string) *grpc.Client {
|
||||
if m, ok := ml.models[s]; ok {
|
||||
log.Debug().Msgf("Model already loaded in memory: %s", s)
|
||||
|
||||
if !m.HealthCheck(context.Background()) {
|
||||
log.Debug().Msgf("GRPC Model not responding", s)
|
||||
if !ml.grpcProcesses[s].IsAlive() {
|
||||
log.Debug().Msgf("GRPC Process is not responding", s)
|
||||
// stop and delete the process, this forces to re-load the model and re-create again the service
|
||||
ml.grpcProcesses[s].Stop()
|
||||
delete(ml.grpcProcesses, s)
|
||||
delete(ml.models, s)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
return m
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
package model
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
)
|
||||
|
||||
|
@ -9,6 +11,7 @@ type Options struct {
|
|||
modelFile string
|
||||
threads uint32
|
||||
assetDir string
|
||||
context context.Context
|
||||
|
||||
gRPCOptions *pb.ModelOptions
|
||||
}
|
||||
|
@ -27,7 +30,7 @@ func WithModelFile(modelFile string) Option {
|
|||
}
|
||||
}
|
||||
|
||||
func WithLoadGRPCOpts(opts *pb.ModelOptions) Option {
|
||||
func WithLoadGRPCLLMModelOpts(opts *pb.ModelOptions) Option {
|
||||
return func(o *Options) {
|
||||
o.gRPCOptions = opts
|
||||
}
|
||||
|
@ -45,8 +48,17 @@ func WithAssetDir(assetDir string) Option {
|
|||
}
|
||||
}
|
||||
|
||||
func WithContext(ctx context.Context) Option {
|
||||
return func(o *Options) {
|
||||
o.context = ctx
|
||||
}
|
||||
}
|
||||
|
||||
func NewOptions(opts ...Option) *Options {
|
||||
o := &Options{}
|
||||
o := &Options{
|
||||
gRPCOptions: &pb.ModelOptions{},
|
||||
context: context.Background(),
|
||||
}
|
||||
for _, opt := range opts {
|
||||
opt(o)
|
||||
}
|
||||
|
|
|
@ -1,12 +0,0 @@
|
|||
//go:build tts
|
||||
// +build tts
|
||||
|
||||
package tts
|
||||
|
||||
import (
|
||||
piper "github.com/mudler/go-piper"
|
||||
)
|
||||
|
||||
func tts(text, model, assetDir, arLib, dst string) error {
|
||||
return piper.TextToWav(text, model, assetDir, arLib, dst)
|
||||
}
|
|
@ -1,10 +0,0 @@
|
|||
//go:build !tts
|
||||
// +build !tts
|
||||
|
||||
package tts
|
||||
|
||||
import "fmt"
|
||||
|
||||
func tts(text, model, assetDir, arLib, dst string) error {
|
||||
return fmt.Errorf("this version of LocalAI was built without the tts tag")
|
||||
}
|
|
@ -1,20 +0,0 @@
|
|||
package tts
|
||||
|
||||
import "os"
|
||||
|
||||
type Piper struct {
|
||||
assetDir string
|
||||
}
|
||||
|
||||
func New(assetDir string) (*Piper, error) {
|
||||
if _, err := os.Stat(assetDir); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &Piper{
|
||||
assetDir: assetDir,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (s *Piper) TTS(text, model, dst string) error {
|
||||
return tts(text, model, s.assetDir, "", dst)
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue