feat: add falcon ggllm via grpc client

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2023-07-15 01:19:43 +02:00
parent a84dee1be1
commit b816009db0
20 changed files with 2239 additions and 271 deletions

98
pkg/grpc/client.go Normal file
View file

@ -0,0 +1,98 @@
package grpc
import (
"context"
"fmt"
"io"
"time"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
)
type Client struct {
address string
}
func NewClient(address string) *Client {
return &Client{
address: address,
}
}
func (c *Client) HealthCheck(ctx context.Context) bool {
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
if err != nil {
fmt.Println(err)
return false
}
defer conn.Close()
client := pb.NewLLMClient(conn)
// The healthcheck call shouldn't take long time
ctx, cancel := context.WithTimeout(ctx, 10*time.Second)
defer cancel()
res, err := client.Health(ctx, &pb.HealthMessage{})
if err != nil {
fmt.Println(err)
return false
}
if res.Message == "OK" {
return true
}
return false
}
func (c *Client) Predict(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.Reply, error) {
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
if err != nil {
return nil, err
}
defer conn.Close()
client := pb.NewLLMClient(conn)
return client.Predict(ctx, in, opts...)
}
func (c *Client) LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grpc.CallOption) (*pb.Result, error) {
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
if err != nil {
return nil, err
}
defer conn.Close()
client := pb.NewLLMClient(conn)
return client.LoadModel(ctx, in, opts...)
}
func (c *Client) PredictStream(ctx context.Context, in *pb.PredictOptions, f func(s string), opts ...grpc.CallOption) error {
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
if err != nil {
return err
}
defer conn.Close()
client := pb.NewLLMClient(conn)
stream, err := client.PredictStream(ctx, in, opts...)
if err != nil {
return err
}
for {
feature, err := stream.Recv()
if err == io.EOF {
break
}
if err != nil {
fmt.Println("Error", err)
return err
}
f(feature.GetMessage())
}
return nil
}

11
pkg/grpc/interface.go Normal file
View file

@ -0,0 +1,11 @@
package grpc
import (
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
)
type LLM interface {
Predict(*pb.PredictOptions) (string, error)
PredictStream(*pb.PredictOptions, chan string)
Load(*pb.ModelOptions) error
}

View file

@ -0,0 +1,136 @@
package falcon
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
ggllm "github.com/mudler/go-ggllm.cpp"
)
type LLM struct {
falcon *ggllm.Falcon
}
func (llm *LLM) Load(opts *pb.ModelOptions) error {
ggllmOpts := []ggllm.ModelOption{}
if opts.ContextSize != 0 {
ggllmOpts = append(ggllmOpts, ggllm.SetContext(int(opts.ContextSize)))
}
// F16 doesn't seem to produce good output at all!
//if c.F16 {
// llamaOpts = append(llamaOpts, llama.EnableF16Memory)
//}
if opts.NGPULayers != 0 {
ggllmOpts = append(ggllmOpts, ggllm.SetGPULayers(int(opts.NGPULayers)))
}
ggllmOpts = append(ggllmOpts, ggllm.SetMMap(opts.MMap))
ggllmOpts = append(ggllmOpts, ggllm.SetMainGPU(opts.MainGPU))
ggllmOpts = append(ggllmOpts, ggllm.SetTensorSplit(opts.TensorSplit))
if opts.NBatch != 0 {
ggllmOpts = append(ggllmOpts, ggllm.SetNBatch(int(opts.NBatch)))
} else {
ggllmOpts = append(ggllmOpts, ggllm.SetNBatch(512))
}
model, err := ggllm.New(opts.Model, ggllmOpts...)
llm.falcon = model
return err
}
func buildPredictOptions(opts *pb.PredictOptions) []ggllm.PredictOption {
predictOptions := []ggllm.PredictOption{
ggllm.SetTemperature(float64(opts.Temperature)),
ggllm.SetTopP(float64(opts.TopP)),
ggllm.SetTopK(int(opts.TopK)),
ggllm.SetTokens(int(opts.Tokens)),
ggllm.SetThreads(int(opts.Threads)),
}
if opts.PromptCacheAll {
predictOptions = append(predictOptions, ggllm.EnablePromptCacheAll)
}
if opts.PromptCacheRO {
predictOptions = append(predictOptions, ggllm.EnablePromptCacheRO)
}
// Expected absolute path
if opts.PromptCachePath != "" {
predictOptions = append(predictOptions, ggllm.SetPathPromptCache(opts.PromptCachePath))
}
if opts.Mirostat != 0 {
predictOptions = append(predictOptions, ggllm.SetMirostat(int(opts.Mirostat)))
}
if opts.MirostatETA != 0 {
predictOptions = append(predictOptions, ggllm.SetMirostatETA(float64(opts.MirostatETA)))
}
if opts.MirostatTAU != 0 {
predictOptions = append(predictOptions, ggllm.SetMirostatTAU(float64(opts.MirostatTAU)))
}
if opts.Debug {
predictOptions = append(predictOptions, ggllm.Debug)
}
predictOptions = append(predictOptions, ggllm.SetStopWords(opts.StopPrompts...))
if opts.PresencePenalty != 0 {
predictOptions = append(predictOptions, ggllm.SetPenalty(float64(opts.PresencePenalty)))
}
if opts.NKeep != 0 {
predictOptions = append(predictOptions, ggllm.SetNKeep(int(opts.NKeep)))
}
if opts.Batch != 0 {
predictOptions = append(predictOptions, ggllm.SetBatch(int(opts.Batch)))
}
if opts.IgnoreEOS {
predictOptions = append(predictOptions, ggllm.IgnoreEOS)
}
if opts.Seed != 0 {
predictOptions = append(predictOptions, ggllm.SetSeed(int(opts.Seed)))
}
//predictOptions = append(predictOptions, llama.SetLogitBias(c.Seed))
predictOptions = append(predictOptions, ggllm.SetFrequencyPenalty(float64(opts.FrequencyPenalty)))
predictOptions = append(predictOptions, ggllm.SetMlock(opts.MLock))
predictOptions = append(predictOptions, ggllm.SetMemoryMap(opts.MMap))
predictOptions = append(predictOptions, ggllm.SetPredictionMainGPU(opts.MainGPU))
predictOptions = append(predictOptions, ggllm.SetPredictionTensorSplit(opts.TensorSplit))
predictOptions = append(predictOptions, ggllm.SetTailFreeSamplingZ(float64(opts.TailFreeSamplingZ)))
predictOptions = append(predictOptions, ggllm.SetTypicalP(float64(opts.TypicalP)))
return predictOptions
}
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
return llm.falcon.Predict(opts.Prompt, buildPredictOptions(opts)...)
}
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) {
predictOptions := buildPredictOptions(opts)
predictOptions = append(predictOptions, ggllm.SetTokenCallback(func(token string) bool {
results <- token
return true
}))
go func() {
_, err := llm.falcon.Predict(opts.Prompt, predictOptions...)
if err != nil {
fmt.Println("err: ", err)
}
close(results)
}()
}

View file

View file

@ -0,0 +1,870 @@
// Code generated by protoc-gen-go. DO NOT EDIT.
// versions:
// protoc-gen-go v1.26.0
// protoc v3.15.8
// source: pkg/grpc/proto/llmserver.proto
package proto
import (
protoreflect "google.golang.org/protobuf/reflect/protoreflect"
protoimpl "google.golang.org/protobuf/runtime/protoimpl"
reflect "reflect"
sync "sync"
)
const (
// Verify that this generated code is sufficiently up-to-date.
_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
// Verify that runtime/protoimpl is sufficiently up-to-date.
_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
)
type HealthMessage struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
}
func (x *HealthMessage) Reset() {
*x = HealthMessage{}
if protoimpl.UnsafeEnabled {
mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[0]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
}
func (x *HealthMessage) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*HealthMessage) ProtoMessage() {}
func (x *HealthMessage) ProtoReflect() protoreflect.Message {
mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[0]
if protoimpl.UnsafeEnabled && x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use HealthMessage.ProtoReflect.Descriptor instead.
func (*HealthMessage) Descriptor() ([]byte, []int) {
return file_pkg_grpc_proto_llmserver_proto_rawDescGZIP(), []int{0}
}
// The request message containing the user's name.
type PredictOptions struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
Prompt string `protobuf:"bytes,1,opt,name=Prompt,proto3" json:"Prompt,omitempty"`
Seed int32 `protobuf:"varint,2,opt,name=Seed,proto3" json:"Seed,omitempty"`
Threads int32 `protobuf:"varint,3,opt,name=Threads,proto3" json:"Threads,omitempty"`
Tokens int32 `protobuf:"varint,4,opt,name=Tokens,proto3" json:"Tokens,omitempty"`
TopK int32 `protobuf:"varint,5,opt,name=TopK,proto3" json:"TopK,omitempty"`
Repeat int32 `protobuf:"varint,6,opt,name=Repeat,proto3" json:"Repeat,omitempty"`
Batch int32 `protobuf:"varint,7,opt,name=Batch,proto3" json:"Batch,omitempty"`
NKeep int32 `protobuf:"varint,8,opt,name=NKeep,proto3" json:"NKeep,omitempty"`
Temperature float32 `protobuf:"fixed32,9,opt,name=Temperature,proto3" json:"Temperature,omitempty"`
Penalty float32 `protobuf:"fixed32,10,opt,name=Penalty,proto3" json:"Penalty,omitempty"`
F16KV bool `protobuf:"varint,11,opt,name=F16KV,proto3" json:"F16KV,omitempty"`
DebugMode bool `protobuf:"varint,12,opt,name=DebugMode,proto3" json:"DebugMode,omitempty"`
StopPrompts []string `protobuf:"bytes,13,rep,name=StopPrompts,proto3" json:"StopPrompts,omitempty"`
IgnoreEOS bool `protobuf:"varint,14,opt,name=IgnoreEOS,proto3" json:"IgnoreEOS,omitempty"`
TailFreeSamplingZ float32 `protobuf:"fixed32,15,opt,name=TailFreeSamplingZ,proto3" json:"TailFreeSamplingZ,omitempty"`
TypicalP float32 `protobuf:"fixed32,16,opt,name=TypicalP,proto3" json:"TypicalP,omitempty"`
FrequencyPenalty float32 `protobuf:"fixed32,17,opt,name=FrequencyPenalty,proto3" json:"FrequencyPenalty,omitempty"`
PresencePenalty float32 `protobuf:"fixed32,18,opt,name=PresencePenalty,proto3" json:"PresencePenalty,omitempty"`
Mirostat int32 `protobuf:"varint,19,opt,name=Mirostat,proto3" json:"Mirostat,omitempty"`
MirostatETA float32 `protobuf:"fixed32,20,opt,name=MirostatETA,proto3" json:"MirostatETA,omitempty"`
MirostatTAU float32 `protobuf:"fixed32,21,opt,name=MirostatTAU,proto3" json:"MirostatTAU,omitempty"`
PenalizeNL bool `protobuf:"varint,22,opt,name=PenalizeNL,proto3" json:"PenalizeNL,omitempty"`
LogitBias string `protobuf:"bytes,23,opt,name=LogitBias,proto3" json:"LogitBias,omitempty"`
PathPromptCache string `protobuf:"bytes,24,opt,name=PathPromptCache,proto3" json:"PathPromptCache,omitempty"`
MLock bool `protobuf:"varint,25,opt,name=MLock,proto3" json:"MLock,omitempty"`
MMap bool `protobuf:"varint,26,opt,name=MMap,proto3" json:"MMap,omitempty"`
PromptCacheAll bool `protobuf:"varint,27,opt,name=PromptCacheAll,proto3" json:"PromptCacheAll,omitempty"`
PromptCacheRO bool `protobuf:"varint,28,opt,name=PromptCacheRO,proto3" json:"PromptCacheRO,omitempty"`
Grammar string `protobuf:"bytes,29,opt,name=Grammar,proto3" json:"Grammar,omitempty"`
MainGPU string `protobuf:"bytes,30,opt,name=MainGPU,proto3" json:"MainGPU,omitempty"`
TensorSplit string `protobuf:"bytes,31,opt,name=TensorSplit,proto3" json:"TensorSplit,omitempty"`
TopP float32 `protobuf:"fixed32,32,opt,name=TopP,proto3" json:"TopP,omitempty"`
PromptCachePath string `protobuf:"bytes,33,opt,name=PromptCachePath,proto3" json:"PromptCachePath,omitempty"`
Debug bool `protobuf:"varint,34,opt,name=Debug,proto3" json:"Debug,omitempty"`
}
func (x *PredictOptions) Reset() {
*x = PredictOptions{}
if protoimpl.UnsafeEnabled {
mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[1]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
}
func (x *PredictOptions) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*PredictOptions) ProtoMessage() {}
func (x *PredictOptions) ProtoReflect() protoreflect.Message {
mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[1]
if protoimpl.UnsafeEnabled && x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use PredictOptions.ProtoReflect.Descriptor instead.
func (*PredictOptions) Descriptor() ([]byte, []int) {
return file_pkg_grpc_proto_llmserver_proto_rawDescGZIP(), []int{1}
}
func (x *PredictOptions) GetPrompt() string {
if x != nil {
return x.Prompt
}
return ""
}
func (x *PredictOptions) GetSeed() int32 {
if x != nil {
return x.Seed
}
return 0
}
func (x *PredictOptions) GetThreads() int32 {
if x != nil {
return x.Threads
}
return 0
}
func (x *PredictOptions) GetTokens() int32 {
if x != nil {
return x.Tokens
}
return 0
}
func (x *PredictOptions) GetTopK() int32 {
if x != nil {
return x.TopK
}
return 0
}
func (x *PredictOptions) GetRepeat() int32 {
if x != nil {
return x.Repeat
}
return 0
}
func (x *PredictOptions) GetBatch() int32 {
if x != nil {
return x.Batch
}
return 0
}
func (x *PredictOptions) GetNKeep() int32 {
if x != nil {
return x.NKeep
}
return 0
}
func (x *PredictOptions) GetTemperature() float32 {
if x != nil {
return x.Temperature
}
return 0
}
func (x *PredictOptions) GetPenalty() float32 {
if x != nil {
return x.Penalty
}
return 0
}
func (x *PredictOptions) GetF16KV() bool {
if x != nil {
return x.F16KV
}
return false
}
func (x *PredictOptions) GetDebugMode() bool {
if x != nil {
return x.DebugMode
}
return false
}
func (x *PredictOptions) GetStopPrompts() []string {
if x != nil {
return x.StopPrompts
}
return nil
}
func (x *PredictOptions) GetIgnoreEOS() bool {
if x != nil {
return x.IgnoreEOS
}
return false
}
func (x *PredictOptions) GetTailFreeSamplingZ() float32 {
if x != nil {
return x.TailFreeSamplingZ
}
return 0
}
func (x *PredictOptions) GetTypicalP() float32 {
if x != nil {
return x.TypicalP
}
return 0
}
func (x *PredictOptions) GetFrequencyPenalty() float32 {
if x != nil {
return x.FrequencyPenalty
}
return 0
}
func (x *PredictOptions) GetPresencePenalty() float32 {
if x != nil {
return x.PresencePenalty
}
return 0
}
func (x *PredictOptions) GetMirostat() int32 {
if x != nil {
return x.Mirostat
}
return 0
}
func (x *PredictOptions) GetMirostatETA() float32 {
if x != nil {
return x.MirostatETA
}
return 0
}
func (x *PredictOptions) GetMirostatTAU() float32 {
if x != nil {
return x.MirostatTAU
}
return 0
}
func (x *PredictOptions) GetPenalizeNL() bool {
if x != nil {
return x.PenalizeNL
}
return false
}
func (x *PredictOptions) GetLogitBias() string {
if x != nil {
return x.LogitBias
}
return ""
}
func (x *PredictOptions) GetPathPromptCache() string {
if x != nil {
return x.PathPromptCache
}
return ""
}
func (x *PredictOptions) GetMLock() bool {
if x != nil {
return x.MLock
}
return false
}
func (x *PredictOptions) GetMMap() bool {
if x != nil {
return x.MMap
}
return false
}
func (x *PredictOptions) GetPromptCacheAll() bool {
if x != nil {
return x.PromptCacheAll
}
return false
}
func (x *PredictOptions) GetPromptCacheRO() bool {
if x != nil {
return x.PromptCacheRO
}
return false
}
func (x *PredictOptions) GetGrammar() string {
if x != nil {
return x.Grammar
}
return ""
}
func (x *PredictOptions) GetMainGPU() string {
if x != nil {
return x.MainGPU
}
return ""
}
func (x *PredictOptions) GetTensorSplit() string {
if x != nil {
return x.TensorSplit
}
return ""
}
func (x *PredictOptions) GetTopP() float32 {
if x != nil {
return x.TopP
}
return 0
}
func (x *PredictOptions) GetPromptCachePath() string {
if x != nil {
return x.PromptCachePath
}
return ""
}
func (x *PredictOptions) GetDebug() bool {
if x != nil {
return x.Debug
}
return false
}
// The response message containing the result
type Reply struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
Message string `protobuf:"bytes,1,opt,name=message,proto3" json:"message,omitempty"`
}
func (x *Reply) Reset() {
*x = Reply{}
if protoimpl.UnsafeEnabled {
mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[2]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
}
func (x *Reply) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*Reply) ProtoMessage() {}
func (x *Reply) ProtoReflect() protoreflect.Message {
mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[2]
if protoimpl.UnsafeEnabled && x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use Reply.ProtoReflect.Descriptor instead.
func (*Reply) Descriptor() ([]byte, []int) {
return file_pkg_grpc_proto_llmserver_proto_rawDescGZIP(), []int{2}
}
func (x *Reply) GetMessage() string {
if x != nil {
return x.Message
}
return ""
}
type ModelOptions struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
Model string `protobuf:"bytes,1,opt,name=Model,proto3" json:"Model,omitempty"`
ContextSize int32 `protobuf:"varint,2,opt,name=ContextSize,proto3" json:"ContextSize,omitempty"`
Seed int32 `protobuf:"varint,3,opt,name=Seed,proto3" json:"Seed,omitempty"`
NBatch int32 `protobuf:"varint,4,opt,name=NBatch,proto3" json:"NBatch,omitempty"`
F16Memory bool `protobuf:"varint,5,opt,name=F16Memory,proto3" json:"F16Memory,omitempty"`
MLock bool `protobuf:"varint,6,opt,name=MLock,proto3" json:"MLock,omitempty"`
MMap bool `protobuf:"varint,7,opt,name=MMap,proto3" json:"MMap,omitempty"`
VocabOnly bool `protobuf:"varint,8,opt,name=VocabOnly,proto3" json:"VocabOnly,omitempty"`
LowVRAM bool `protobuf:"varint,9,opt,name=LowVRAM,proto3" json:"LowVRAM,omitempty"`
Embeddings bool `protobuf:"varint,10,opt,name=Embeddings,proto3" json:"Embeddings,omitempty"`
NUMA bool `protobuf:"varint,11,opt,name=NUMA,proto3" json:"NUMA,omitempty"`
NGPULayers int32 `protobuf:"varint,12,opt,name=NGPULayers,proto3" json:"NGPULayers,omitempty"`
MainGPU string `protobuf:"bytes,13,opt,name=MainGPU,proto3" json:"MainGPU,omitempty"`
TensorSplit string `protobuf:"bytes,14,opt,name=TensorSplit,proto3" json:"TensorSplit,omitempty"`
}
func (x *ModelOptions) Reset() {
*x = ModelOptions{}
if protoimpl.UnsafeEnabled {
mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[3]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
}
func (x *ModelOptions) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*ModelOptions) ProtoMessage() {}
func (x *ModelOptions) ProtoReflect() protoreflect.Message {
mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[3]
if protoimpl.UnsafeEnabled && x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use ModelOptions.ProtoReflect.Descriptor instead.
func (*ModelOptions) Descriptor() ([]byte, []int) {
return file_pkg_grpc_proto_llmserver_proto_rawDescGZIP(), []int{3}
}
func (x *ModelOptions) GetModel() string {
if x != nil {
return x.Model
}
return ""
}
func (x *ModelOptions) GetContextSize() int32 {
if x != nil {
return x.ContextSize
}
return 0
}
func (x *ModelOptions) GetSeed() int32 {
if x != nil {
return x.Seed
}
return 0
}
func (x *ModelOptions) GetNBatch() int32 {
if x != nil {
return x.NBatch
}
return 0
}
func (x *ModelOptions) GetF16Memory() bool {
if x != nil {
return x.F16Memory
}
return false
}
func (x *ModelOptions) GetMLock() bool {
if x != nil {
return x.MLock
}
return false
}
func (x *ModelOptions) GetMMap() bool {
if x != nil {
return x.MMap
}
return false
}
func (x *ModelOptions) GetVocabOnly() bool {
if x != nil {
return x.VocabOnly
}
return false
}
func (x *ModelOptions) GetLowVRAM() bool {
if x != nil {
return x.LowVRAM
}
return false
}
func (x *ModelOptions) GetEmbeddings() bool {
if x != nil {
return x.Embeddings
}
return false
}
func (x *ModelOptions) GetNUMA() bool {
if x != nil {
return x.NUMA
}
return false
}
func (x *ModelOptions) GetNGPULayers() int32 {
if x != nil {
return x.NGPULayers
}
return 0
}
func (x *ModelOptions) GetMainGPU() string {
if x != nil {
return x.MainGPU
}
return ""
}
func (x *ModelOptions) GetTensorSplit() string {
if x != nil {
return x.TensorSplit
}
return ""
}
type Result struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
Message string `protobuf:"bytes,1,opt,name=message,proto3" json:"message,omitempty"`
Success bool `protobuf:"varint,2,opt,name=success,proto3" json:"success,omitempty"`
}
func (x *Result) Reset() {
*x = Result{}
if protoimpl.UnsafeEnabled {
mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[4]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
}
func (x *Result) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*Result) ProtoMessage() {}
func (x *Result) ProtoReflect() protoreflect.Message {
mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[4]
if protoimpl.UnsafeEnabled && x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use Result.ProtoReflect.Descriptor instead.
func (*Result) Descriptor() ([]byte, []int) {
return file_pkg_grpc_proto_llmserver_proto_rawDescGZIP(), []int{4}
}
func (x *Result) GetMessage() string {
if x != nil {
return x.Message
}
return ""
}
func (x *Result) GetSuccess() bool {
if x != nil {
return x.Success
}
return false
}
var File_pkg_grpc_proto_llmserver_proto protoreflect.FileDescriptor
var file_pkg_grpc_proto_llmserver_proto_rawDesc = []byte{
0x0a, 0x1e, 0x70, 0x6b, 0x67, 0x2f, 0x67, 0x72, 0x70, 0x63, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f,
0x2f, 0x6c, 0x6c, 0x6d, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f,
0x12, 0x03, 0x6c, 0x6c, 0x6d, 0x22, 0x0f, 0x0a, 0x0d, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d,
0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0x80, 0x08, 0x0a, 0x0e, 0x50, 0x72, 0x65, 0x64, 0x69,
0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x50, 0x72, 0x6f,
0x6d, 0x70, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x50, 0x72, 0x6f, 0x6d, 0x70,
0x74, 0x12, 0x12, 0x0a, 0x04, 0x53, 0x65, 0x65, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52,
0x04, 0x53, 0x65, 0x65, 0x64, 0x12, 0x18, 0x0a, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73,
0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x12,
0x16, 0x0a, 0x06, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52,
0x06, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x54, 0x6f, 0x70, 0x4b, 0x18,
0x05, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x54, 0x6f, 0x70, 0x4b, 0x12, 0x16, 0x0a, 0x06, 0x52,
0x65, 0x70, 0x65, 0x61, 0x74, 0x18, 0x06, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x52, 0x65, 0x70,
0x65, 0x61, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x42, 0x61, 0x74, 0x63, 0x68, 0x18, 0x07, 0x20, 0x01,
0x28, 0x05, 0x52, 0x05, 0x42, 0x61, 0x74, 0x63, 0x68, 0x12, 0x14, 0x0a, 0x05, 0x4e, 0x4b, 0x65,
0x65, 0x70, 0x18, 0x08, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x4e, 0x4b, 0x65, 0x65, 0x70, 0x12,
0x20, 0x0a, 0x0b, 0x54, 0x65, 0x6d, 0x70, 0x65, 0x72, 0x61, 0x74, 0x75, 0x72, 0x65, 0x18, 0x09,
0x20, 0x01, 0x28, 0x02, 0x52, 0x0b, 0x54, 0x65, 0x6d, 0x70, 0x65, 0x72, 0x61, 0x74, 0x75, 0x72,
0x65, 0x12, 0x18, 0x0a, 0x07, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x18, 0x0a, 0x20, 0x01,
0x28, 0x02, 0x52, 0x07, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x46,
0x31, 0x36, 0x4b, 0x56, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x46, 0x31, 0x36, 0x4b,
0x56, 0x12, 0x1c, 0x0a, 0x09, 0x44, 0x65, 0x62, 0x75, 0x67, 0x4d, 0x6f, 0x64, 0x65, 0x18, 0x0c,
0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x44, 0x65, 0x62, 0x75, 0x67, 0x4d, 0x6f, 0x64, 0x65, 0x12,
0x20, 0x0a, 0x0b, 0x53, 0x74, 0x6f, 0x70, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x73, 0x18, 0x0d,
0x20, 0x03, 0x28, 0x09, 0x52, 0x0b, 0x53, 0x74, 0x6f, 0x70, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74,
0x73, 0x12, 0x1c, 0x0a, 0x09, 0x49, 0x67, 0x6e, 0x6f, 0x72, 0x65, 0x45, 0x4f, 0x53, 0x18, 0x0e,
0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x49, 0x67, 0x6e, 0x6f, 0x72, 0x65, 0x45, 0x4f, 0x53, 0x12,
0x2c, 0x0a, 0x11, 0x54, 0x61, 0x69, 0x6c, 0x46, 0x72, 0x65, 0x65, 0x53, 0x61, 0x6d, 0x70, 0x6c,
0x69, 0x6e, 0x67, 0x5a, 0x18, 0x0f, 0x20, 0x01, 0x28, 0x02, 0x52, 0x11, 0x54, 0x61, 0x69, 0x6c,
0x46, 0x72, 0x65, 0x65, 0x53, 0x61, 0x6d, 0x70, 0x6c, 0x69, 0x6e, 0x67, 0x5a, 0x12, 0x1a, 0x0a,
0x08, 0x54, 0x79, 0x70, 0x69, 0x63, 0x61, 0x6c, 0x50, 0x18, 0x10, 0x20, 0x01, 0x28, 0x02, 0x52,
0x08, 0x54, 0x79, 0x70, 0x69, 0x63, 0x61, 0x6c, 0x50, 0x12, 0x2a, 0x0a, 0x10, 0x46, 0x72, 0x65,
0x71, 0x75, 0x65, 0x6e, 0x63, 0x79, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x18, 0x11, 0x20,
0x01, 0x28, 0x02, 0x52, 0x10, 0x46, 0x72, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x63, 0x79, 0x50, 0x65,
0x6e, 0x61, 0x6c, 0x74, 0x79, 0x12, 0x28, 0x0a, 0x0f, 0x50, 0x72, 0x65, 0x73, 0x65, 0x6e, 0x63,
0x65, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x18, 0x12, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0f,
0x50, 0x72, 0x65, 0x73, 0x65, 0x6e, 0x63, 0x65, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x12,
0x1a, 0x0a, 0x08, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x18, 0x13, 0x20, 0x01, 0x28,
0x05, 0x52, 0x08, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x12, 0x20, 0x0a, 0x0b, 0x4d,
0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x45, 0x54, 0x41, 0x18, 0x14, 0x20, 0x01, 0x28, 0x02,
0x52, 0x0b, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x45, 0x54, 0x41, 0x12, 0x20, 0x0a,
0x0b, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x54, 0x41, 0x55, 0x18, 0x15, 0x20, 0x01,
0x28, 0x02, 0x52, 0x0b, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x54, 0x41, 0x55, 0x12,
0x1e, 0x0a, 0x0a, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x4e, 0x4c, 0x18, 0x16, 0x20,
0x01, 0x28, 0x08, 0x52, 0x0a, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x4e, 0x4c, 0x12,
0x1c, 0x0a, 0x09, 0x4c, 0x6f, 0x67, 0x69, 0x74, 0x42, 0x69, 0x61, 0x73, 0x18, 0x17, 0x20, 0x01,
0x28, 0x09, 0x52, 0x09, 0x4c, 0x6f, 0x67, 0x69, 0x74, 0x42, 0x69, 0x61, 0x73, 0x12, 0x28, 0x0a,
0x0f, 0x50, 0x61, 0x74, 0x68, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65,
0x18, 0x18, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0f, 0x50, 0x61, 0x74, 0x68, 0x50, 0x72, 0x6f, 0x6d,
0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b,
0x18, 0x19, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x12, 0x12, 0x0a,
0x04, 0x4d, 0x4d, 0x61, 0x70, 0x18, 0x1a, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x4d, 0x4d, 0x61,
0x70, 0x12, 0x26, 0x0a, 0x0e, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65,
0x41, 0x6c, 0x6c, 0x18, 0x1b, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0e, 0x50, 0x72, 0x6f, 0x6d, 0x70,
0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x41, 0x6c, 0x6c, 0x12, 0x24, 0x0a, 0x0d, 0x50, 0x72, 0x6f,
0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x52, 0x4f, 0x18, 0x1c, 0x20, 0x01, 0x28, 0x08,
0x52, 0x0d, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x52, 0x4f, 0x12,
0x18, 0x0a, 0x07, 0x47, 0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72, 0x18, 0x1d, 0x20, 0x01, 0x28, 0x09,
0x52, 0x07, 0x47, 0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72, 0x12, 0x18, 0x0a, 0x07, 0x4d, 0x61, 0x69,
0x6e, 0x47, 0x50, 0x55, 0x18, 0x1e, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x4d, 0x61, 0x69, 0x6e,
0x47, 0x50, 0x55, 0x12, 0x20, 0x0a, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c,
0x69, 0x74, 0x18, 0x1f, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72,
0x53, 0x70, 0x6c, 0x69, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x54, 0x6f, 0x70, 0x50, 0x18, 0x20, 0x20,
0x01, 0x28, 0x02, 0x52, 0x04, 0x54, 0x6f, 0x70, 0x50, 0x12, 0x28, 0x0a, 0x0f, 0x50, 0x72, 0x6f,
0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x50, 0x61, 0x74, 0x68, 0x18, 0x21, 0x20, 0x01,
0x28, 0x09, 0x52, 0x0f, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x50,
0x61, 0x74, 0x68, 0x12, 0x14, 0x0a, 0x05, 0x44, 0x65, 0x62, 0x75, 0x67, 0x18, 0x22, 0x20, 0x01,
0x28, 0x08, 0x52, 0x05, 0x44, 0x65, 0x62, 0x75, 0x67, 0x22, 0x21, 0x0a, 0x05, 0x52, 0x65, 0x70,
0x6c, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20,
0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0x82, 0x03, 0x0a,
0x0c, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x14, 0x0a,
0x05, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x4d, 0x6f,
0x64, 0x65, 0x6c, 0x12, 0x20, 0x0a, 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x53, 0x69,
0x7a, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78,
0x74, 0x53, 0x69, 0x7a, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x53, 0x65, 0x65, 0x64, 0x18, 0x03, 0x20,
0x01, 0x28, 0x05, 0x52, 0x04, 0x53, 0x65, 0x65, 0x64, 0x12, 0x16, 0x0a, 0x06, 0x4e, 0x42, 0x61,
0x74, 0x63, 0x68, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x4e, 0x42, 0x61, 0x74, 0x63,
0x68, 0x12, 0x1c, 0x0a, 0x09, 0x46, 0x31, 0x36, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x18, 0x05,
0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x46, 0x31, 0x36, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x12,
0x14, 0x0a, 0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05,
0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x12, 0x12, 0x0a, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x18, 0x07, 0x20,
0x01, 0x28, 0x08, 0x52, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x12, 0x1c, 0x0a, 0x09, 0x56, 0x6f, 0x63,
0x61, 0x62, 0x4f, 0x6e, 0x6c, 0x79, 0x18, 0x08, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x56, 0x6f,
0x63, 0x61, 0x62, 0x4f, 0x6e, 0x6c, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x4c, 0x6f, 0x77, 0x56, 0x52,
0x41, 0x4d, 0x18, 0x09, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x4c, 0x6f, 0x77, 0x56, 0x52, 0x41,
0x4d, 0x12, 0x1e, 0x0a, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18,
0x0a, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67,
0x73, 0x12, 0x12, 0x0a, 0x04, 0x4e, 0x55, 0x4d, 0x41, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x08, 0x52,
0x04, 0x4e, 0x55, 0x4d, 0x41, 0x12, 0x1e, 0x0a, 0x0a, 0x4e, 0x47, 0x50, 0x55, 0x4c, 0x61, 0x79,
0x65, 0x72, 0x73, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0a, 0x4e, 0x47, 0x50, 0x55, 0x4c,
0x61, 0x79, 0x65, 0x72, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55,
0x18, 0x0d, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x12,
0x20, 0x0a, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74, 0x18, 0x0e,
0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69,
0x74, 0x22, 0x3c, 0x0a, 0x06, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x6d,
0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65,
0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73,
0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x32,
0xc4, 0x01, 0x0a, 0x03, 0x4c, 0x4c, 0x4d, 0x12, 0x2a, 0x0a, 0x06, 0x48, 0x65, 0x61, 0x6c, 0x74,
0x68, 0x12, 0x12, 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65,
0x73, 0x73, 0x61, 0x67, 0x65, 0x1a, 0x0a, 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x52, 0x65, 0x70, 0x6c,
0x79, 0x22, 0x00, 0x12, 0x2c, 0x0a, 0x07, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x12, 0x13,
0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69,
0x6f, 0x6e, 0x73, 0x1a, 0x0a, 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22,
0x00, 0x12, 0x2d, 0x0a, 0x09, 0x4c, 0x6f, 0x61, 0x64, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x11,
0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e,
0x73, 0x1a, 0x0b, 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00,
0x12, 0x34, 0x0a, 0x0d, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x53, 0x74, 0x72, 0x65, 0x61,
0x6d, 0x12, 0x13, 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f,
0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0a, 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x52, 0x65, 0x70,
0x6c, 0x79, 0x22, 0x00, 0x30, 0x01, 0x42, 0x57, 0x0a, 0x1b, 0x69, 0x6f, 0x2e, 0x73, 0x6b, 0x79,
0x6e, 0x65, 0x74, 0x2e, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x61, 0x69, 0x2e, 0x6c, 0x6c, 0x6d, 0x73,
0x65, 0x72, 0x76, 0x65, 0x72, 0x42, 0x09, 0x4c, 0x4c, 0x4d, 0x53, 0x65, 0x72, 0x76, 0x65, 0x72,
0x50, 0x01, 0x5a, 0x2b, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x67,
0x6f, 0x2d, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2f, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49,
0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x67, 0x72, 0x70, 0x63, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62,
0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
}
var (
file_pkg_grpc_proto_llmserver_proto_rawDescOnce sync.Once
file_pkg_grpc_proto_llmserver_proto_rawDescData = file_pkg_grpc_proto_llmserver_proto_rawDesc
)
func file_pkg_grpc_proto_llmserver_proto_rawDescGZIP() []byte {
file_pkg_grpc_proto_llmserver_proto_rawDescOnce.Do(func() {
file_pkg_grpc_proto_llmserver_proto_rawDescData = protoimpl.X.CompressGZIP(file_pkg_grpc_proto_llmserver_proto_rawDescData)
})
return file_pkg_grpc_proto_llmserver_proto_rawDescData
}
var file_pkg_grpc_proto_llmserver_proto_msgTypes = make([]protoimpl.MessageInfo, 5)
var file_pkg_grpc_proto_llmserver_proto_goTypes = []interface{}{
(*HealthMessage)(nil), // 0: llm.HealthMessage
(*PredictOptions)(nil), // 1: llm.PredictOptions
(*Reply)(nil), // 2: llm.Reply
(*ModelOptions)(nil), // 3: llm.ModelOptions
(*Result)(nil), // 4: llm.Result
}
var file_pkg_grpc_proto_llmserver_proto_depIdxs = []int32{
0, // 0: llm.LLM.Health:input_type -> llm.HealthMessage
1, // 1: llm.LLM.Predict:input_type -> llm.PredictOptions
3, // 2: llm.LLM.LoadModel:input_type -> llm.ModelOptions
1, // 3: llm.LLM.PredictStream:input_type -> llm.PredictOptions
2, // 4: llm.LLM.Health:output_type -> llm.Reply
2, // 5: llm.LLM.Predict:output_type -> llm.Reply
4, // 6: llm.LLM.LoadModel:output_type -> llm.Result
2, // 7: llm.LLM.PredictStream:output_type -> llm.Reply
4, // [4:8] is the sub-list for method output_type
0, // [0:4] is the sub-list for method input_type
0, // [0:0] is the sub-list for extension type_name
0, // [0:0] is the sub-list for extension extendee
0, // [0:0] is the sub-list for field type_name
}
func init() { file_pkg_grpc_proto_llmserver_proto_init() }
func file_pkg_grpc_proto_llmserver_proto_init() {
if File_pkg_grpc_proto_llmserver_proto != nil {
return
}
if !protoimpl.UnsafeEnabled {
file_pkg_grpc_proto_llmserver_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} {
switch v := v.(*HealthMessage); i {
case 0:
return &v.state
case 1:
return &v.sizeCache
case 2:
return &v.unknownFields
default:
return nil
}
}
file_pkg_grpc_proto_llmserver_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} {
switch v := v.(*PredictOptions); i {
case 0:
return &v.state
case 1:
return &v.sizeCache
case 2:
return &v.unknownFields
default:
return nil
}
}
file_pkg_grpc_proto_llmserver_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} {
switch v := v.(*Reply); i {
case 0:
return &v.state
case 1:
return &v.sizeCache
case 2:
return &v.unknownFields
default:
return nil
}
}
file_pkg_grpc_proto_llmserver_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} {
switch v := v.(*ModelOptions); i {
case 0:
return &v.state
case 1:
return &v.sizeCache
case 2:
return &v.unknownFields
default:
return nil
}
}
file_pkg_grpc_proto_llmserver_proto_msgTypes[4].Exporter = func(v interface{}, i int) interface{} {
switch v := v.(*Result); i {
case 0:
return &v.state
case 1:
return &v.sizeCache
case 2:
return &v.unknownFields
default:
return nil
}
}
}
type x struct{}
out := protoimpl.TypeBuilder{
File: protoimpl.DescBuilder{
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
RawDescriptor: file_pkg_grpc_proto_llmserver_proto_rawDesc,
NumEnums: 0,
NumMessages: 5,
NumExtensions: 0,
NumServices: 1,
},
GoTypes: file_pkg_grpc_proto_llmserver_proto_goTypes,
DependencyIndexes: file_pkg_grpc_proto_llmserver_proto_depIdxs,
MessageInfos: file_pkg_grpc_proto_llmserver_proto_msgTypes,
}.Build()
File_pkg_grpc_proto_llmserver_proto = out.File
file_pkg_grpc_proto_llmserver_proto_rawDesc = nil
file_pkg_grpc_proto_llmserver_proto_goTypes = nil
file_pkg_grpc_proto_llmserver_proto_depIdxs = nil
}

View file

@ -0,0 +1,82 @@
syntax = "proto3";
option go_package = "github.com/go-skynet/LocalAI/pkg/grpc/proto";
option java_multiple_files = true;
option java_package = "io.skynet.localai.llmserver";
option java_outer_classname = "LLMServer";
package llm;
service LLM {
rpc Health(HealthMessage) returns (Reply) {}
rpc Predict(PredictOptions) returns (Reply) {}
rpc LoadModel(ModelOptions) returns (Result) {}
rpc PredictStream(PredictOptions) returns (stream Reply) {}
}
message HealthMessage {}
// The request message containing the user's name.
message PredictOptions {
string Prompt = 1;
int32 Seed = 2;
int32 Threads = 3;
int32 Tokens = 4;
int32 TopK = 5;
int32 Repeat = 6;
int32 Batch = 7;
int32 NKeep = 8;
float Temperature = 9;
float Penalty = 10;
bool F16KV = 11;
bool DebugMode = 12;
repeated string StopPrompts = 13;
bool IgnoreEOS = 14;
float TailFreeSamplingZ = 15;
float TypicalP = 16;
float FrequencyPenalty = 17;
float PresencePenalty = 18;
int32 Mirostat = 19;
float MirostatETA = 20;
float MirostatTAU = 21;
bool PenalizeNL = 22;
string LogitBias = 23;
string PathPromptCache = 24;
bool MLock = 25;
bool MMap = 26;
bool PromptCacheAll = 27;
bool PromptCacheRO = 28;
string Grammar = 29;
string MainGPU = 30;
string TensorSplit = 31;
float TopP = 32;
string PromptCachePath = 33;
bool Debug = 34;
}
// The response message containing the result
message Reply {
string message = 1;
}
message ModelOptions {
string Model = 1;
int32 ContextSize = 2;
int32 Seed = 3;
int32 NBatch = 4;
bool F16Memory = 5;
bool MLock = 6;
bool MMap = 7;
bool VocabOnly = 8;
bool LowVRAM = 9;
bool Embeddings = 10;
bool NUMA = 11;
int32 NGPULayers = 12;
string MainGPU = 13;
string TensorSplit = 14;
}
message Result {
string message = 1;
bool success = 2;
}

View file

@ -0,0 +1,241 @@
// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
// versions:
// - protoc-gen-go-grpc v1.2.0
// - protoc v3.15.8
// source: pkg/grpc/proto/llmserver.proto
package proto
import (
context "context"
grpc "google.golang.org/grpc"
codes "google.golang.org/grpc/codes"
status "google.golang.org/grpc/status"
)
// This is a compile-time assertion to ensure that this generated file
// is compatible with the grpc package it is being compiled against.
// Requires gRPC-Go v1.32.0 or later.
const _ = grpc.SupportPackageIsVersion7
// LLMClient is the client API for LLM service.
//
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
type LLMClient interface {
Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error)
Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error)
LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error)
PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (LLM_PredictStreamClient, error)
}
type lLMClient struct {
cc grpc.ClientConnInterface
}
func NewLLMClient(cc grpc.ClientConnInterface) LLMClient {
return &lLMClient{cc}
}
func (c *lLMClient) Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error) {
out := new(Reply)
err := c.cc.Invoke(ctx, "/llm.LLM/Health", in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *lLMClient) Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error) {
out := new(Reply)
err := c.cc.Invoke(ctx, "/llm.LLM/Predict", in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *lLMClient) LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error) {
out := new(Result)
err := c.cc.Invoke(ctx, "/llm.LLM/LoadModel", in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *lLMClient) PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (LLM_PredictStreamClient, error) {
stream, err := c.cc.NewStream(ctx, &LLM_ServiceDesc.Streams[0], "/llm.LLM/PredictStream", opts...)
if err != nil {
return nil, err
}
x := &lLMPredictStreamClient{stream}
if err := x.ClientStream.SendMsg(in); err != nil {
return nil, err
}
if err := x.ClientStream.CloseSend(); err != nil {
return nil, err
}
return x, nil
}
type LLM_PredictStreamClient interface {
Recv() (*Reply, error)
grpc.ClientStream
}
type lLMPredictStreamClient struct {
grpc.ClientStream
}
func (x *lLMPredictStreamClient) Recv() (*Reply, error) {
m := new(Reply)
if err := x.ClientStream.RecvMsg(m); err != nil {
return nil, err
}
return m, nil
}
// LLMServer is the server API for LLM service.
// All implementations must embed UnimplementedLLMServer
// for forward compatibility
type LLMServer interface {
Health(context.Context, *HealthMessage) (*Reply, error)
Predict(context.Context, *PredictOptions) (*Reply, error)
LoadModel(context.Context, *ModelOptions) (*Result, error)
PredictStream(*PredictOptions, LLM_PredictStreamServer) error
mustEmbedUnimplementedLLMServer()
}
// UnimplementedLLMServer must be embedded to have forward compatible implementations.
type UnimplementedLLMServer struct {
}
func (UnimplementedLLMServer) Health(context.Context, *HealthMessage) (*Reply, error) {
return nil, status.Errorf(codes.Unimplemented, "method Health not implemented")
}
func (UnimplementedLLMServer) Predict(context.Context, *PredictOptions) (*Reply, error) {
return nil, status.Errorf(codes.Unimplemented, "method Predict not implemented")
}
func (UnimplementedLLMServer) LoadModel(context.Context, *ModelOptions) (*Result, error) {
return nil, status.Errorf(codes.Unimplemented, "method LoadModel not implemented")
}
func (UnimplementedLLMServer) PredictStream(*PredictOptions, LLM_PredictStreamServer) error {
return status.Errorf(codes.Unimplemented, "method PredictStream not implemented")
}
func (UnimplementedLLMServer) mustEmbedUnimplementedLLMServer() {}
// UnsafeLLMServer may be embedded to opt out of forward compatibility for this service.
// Use of this interface is not recommended, as added methods to LLMServer will
// result in compilation errors.
type UnsafeLLMServer interface {
mustEmbedUnimplementedLLMServer()
}
func RegisterLLMServer(s grpc.ServiceRegistrar, srv LLMServer) {
s.RegisterService(&LLM_ServiceDesc, srv)
}
func _LLM_Health_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(HealthMessage)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(LLMServer).Health(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: "/llm.LLM/Health",
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(LLMServer).Health(ctx, req.(*HealthMessage))
}
return interceptor(ctx, in, info, handler)
}
func _LLM_Predict_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(PredictOptions)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(LLMServer).Predict(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: "/llm.LLM/Predict",
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(LLMServer).Predict(ctx, req.(*PredictOptions))
}
return interceptor(ctx, in, info, handler)
}
func _LLM_LoadModel_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(ModelOptions)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(LLMServer).LoadModel(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: "/llm.LLM/LoadModel",
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(LLMServer).LoadModel(ctx, req.(*ModelOptions))
}
return interceptor(ctx, in, info, handler)
}
func _LLM_PredictStream_Handler(srv interface{}, stream grpc.ServerStream) error {
m := new(PredictOptions)
if err := stream.RecvMsg(m); err != nil {
return err
}
return srv.(LLMServer).PredictStream(m, &lLMPredictStreamServer{stream})
}
type LLM_PredictStreamServer interface {
Send(*Reply) error
grpc.ServerStream
}
type lLMPredictStreamServer struct {
grpc.ServerStream
}
func (x *lLMPredictStreamServer) Send(m *Reply) error {
return x.ServerStream.SendMsg(m)
}
// LLM_ServiceDesc is the grpc.ServiceDesc for LLM service.
// It's only intended for direct use with grpc.RegisterService,
// and not to be introspected or modified (even as a copy)
var LLM_ServiceDesc = grpc.ServiceDesc{
ServiceName: "llm.LLM",
HandlerType: (*LLMServer)(nil),
Methods: []grpc.MethodDesc{
{
MethodName: "Health",
Handler: _LLM_Health_Handler,
},
{
MethodName: "Predict",
Handler: _LLM_Predict_Handler,
},
{
MethodName: "LoadModel",
Handler: _LLM_LoadModel_Handler,
},
},
Streams: []grpc.StreamDesc{
{
StreamName: "PredictStream",
Handler: _LLM_PredictStream_Handler,
ServerStreams: true,
},
},
Metadata: "pkg/grpc/proto/llmserver.proto",
}

76
pkg/grpc/server.go Normal file
View file

@ -0,0 +1,76 @@
package grpc
import (
"context"
"fmt"
"log"
"net"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
"google.golang.org/grpc"
)
// A GRPC Server that allows to run LLM inference.
// It is used by the LLMServices to expose the LLM functionalities that are called by the client.
// The GRPC Service is general, trying to encompass all the possible LLM options models.
// It depends on the real implementer then what can be done or not.
//
// The server is implemented as a GRPC service, with the following methods:
// - Predict: to run the inference with options
// - PredictStream: to run the inference with options and stream the results
// server is used to implement helloworld.GreeterServer.
type server struct {
pb.UnimplementedLLMServer
llm LLM
}
func (s *server) Health(ctx context.Context, in *pb.HealthMessage) (*pb.Reply, error) {
return &pb.Reply{Message: "OK"}, nil
}
func (s *server) LoadModel(ctx context.Context, in *pb.ModelOptions) (*pb.Result, error) {
err := s.llm.Load(in)
if err != nil {
return &pb.Result{Message: fmt.Sprintf("Error loading model: %s", err.Error()), Success: false}, err
}
return &pb.Result{Message: "Loading succeeded", Success: true}, nil
}
func (s *server) Predict(ctx context.Context, in *pb.PredictOptions) (*pb.Reply, error) {
result, err := s.llm.Predict(in)
return &pb.Reply{Message: result}, err
}
func (s *server) PredictStream(in *pb.PredictOptions, stream pb.LLM_PredictStreamServer) error {
resultChan := make(chan string)
done := make(chan bool)
go func() {
for result := range resultChan {
stream.Send(&pb.Reply{Message: result})
}
done <- true
}()
s.llm.PredictStream(in, resultChan)
<-done
return nil
}
func StartServer(address string, model LLM) error {
lis, err := net.Listen("tcp", address)
if err != nil {
return err
}
s := grpc.NewServer()
pb.RegisterLLMServer(s, &server{llm: model})
log.Printf("gRPC Server listening at %v", lis.Addr())
if err := s.Serve(lis); err != nil {
return err
}
return nil
}

View file

@ -1,12 +1,16 @@
package model
import (
"context"
"fmt"
"os"
"path/filepath"
"strings"
"time"
rwkv "github.com/donomii/go-rwkv.cpp"
whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
"github.com/go-skynet/LocalAI/pkg/langchain"
"github.com/go-skynet/LocalAI/pkg/stablediffusion"
"github.com/go-skynet/LocalAI/pkg/tts"
@ -15,8 +19,12 @@ import (
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
llama "github.com/go-skynet/go-llama.cpp"
"github.com/hashicorp/go-multierror"
"github.com/hpcloud/tail"
gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
"github.com/phayes/freeport"
"github.com/rs/zerolog/log"
process "github.com/mudler/go-processmanager"
)
const tokenizerSuffix = ".tokenizer.json"
@ -42,22 +50,24 @@ const (
StableDiffusionBackend = "stablediffusion"
PiperBackend = "piper"
LCHuggingFaceBackend = "langchain-huggingface"
//GGLLMFalconBackend = "falcon"
)
var autoLoadBackends []string = []string{
LlamaBackend,
Gpt4All,
RwkvBackend,
GPTNeoXBackend,
//GGLLMFalconBackend,
WhisperBackend,
BertEmbeddingsBackend,
GPTNeoXBackend,
GPTJBackend,
Gpt2Backend,
DollyBackend,
FalconBackend,
MPTBackend,
ReplitBackend,
StarcoderBackend,
FalconBackend,
BloomzBackend,
}
@ -73,6 +83,12 @@ var dolly = func(modelFile string) (interface{}, error) {
return transformers.NewDolly(modelFile)
}
// func ggllmFalcon(opts ...ggllm.ModelOption) func(string) (interface{}, error) {
// return func(s string) (interface{}, error) {
// return ggllm.New(s, opts...)
// }
// }
var gptNeoX = func(modelFile string) (interface{}, error) {
return transformers.NewGPTNeoX(modelFile)
}
@ -143,55 +159,157 @@ func rwkvLM(tokenFile string, threads uint32) func(string) (interface{}, error)
}
}
func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, llamaOpts []llama.ModelOption, threads uint32, assetDir string) (model interface{}, err error) {
log.Debug().Msgf("Loading model %s from %s", backendString, modelFile)
switch strings.ToLower(backendString) {
case LlamaBackend:
return ml.LoadModel(modelFile, llamaLM(llamaOpts...))
case BloomzBackend:
return ml.LoadModel(modelFile, bloomzLM)
case GPTJBackend:
return ml.LoadModel(modelFile, gptJ)
case DollyBackend:
return ml.LoadModel(modelFile, dolly)
case MPTBackend:
return ml.LoadModel(modelFile, mpt)
case Gpt2Backend:
return ml.LoadModel(modelFile, transformersLM)
case FalconBackend:
return ml.LoadModel(modelFile, falcon)
case GPTNeoXBackend:
return ml.LoadModel(modelFile, gptNeoX)
case ReplitBackend:
return ml.LoadModel(modelFile, replit)
case StableDiffusionBackend:
return ml.LoadModel(modelFile, stableDiffusion)
case PiperBackend:
return ml.LoadModel(modelFile, piperTTS(filepath.Join(assetDir, "backend-assets", "espeak-ng-data")))
case StarcoderBackend:
return ml.LoadModel(modelFile, starCoder)
case Gpt4AllLlamaBackend, Gpt4AllMptBackend, Gpt4AllJBackend, Gpt4All:
return ml.LoadModel(modelFile, gpt4allLM(gpt4all.SetThreads(int(threads)), gpt4all.SetLibrarySearchPath(filepath.Join(assetDir, "backend-assets", "gpt4all"))))
case BertEmbeddingsBackend:
return ml.LoadModel(modelFile, bertEmbeddings)
case RwkvBackend:
return ml.LoadModel(modelFile, rwkvLM(filepath.Join(ml.ModelPath, modelFile+tokenizerSuffix), threads))
case WhisperBackend:
return ml.LoadModel(modelFile, whisperModel)
case LCHuggingFaceBackend:
return ml.LoadModel(modelFile, lcHuggingFace)
default:
return nil, fmt.Errorf("backend unsupported: %s", backendString)
// starts the grpcModelProcess for the backend, and returns a grpc client
// It also loads the model
func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string) (interface{}, error) {
return func(s string) (interface{}, error) {
log.Debug().Msgf("Loading GRPC Model", backend, *o)
grpcProcess := filepath.Join(o.assetDir, "backend-assets", "grpc", backend)
// Make sure the process is executable
if err := os.Chmod(grpcProcess, 0755); err != nil {
return nil, err
}
log.Debug().Msgf("Loading GRPC Process", grpcProcess)
port, err := freeport.GetFreePort()
if err != nil {
return nil, err
}
serverAddress := fmt.Sprintf("localhost:%d", port)
log.Debug().Msgf("GRPC Service for '%s' (%s) will be running at: '%s'", backend, o.modelFile, serverAddress)
grpcControlProcess := process.New(
process.WithTemporaryStateDir(),
process.WithName(grpcProcess),
process.WithArgs("--addr", serverAddress))
ml.grpcProcesses[o.modelFile] = grpcControlProcess
if err := grpcControlProcess.Run(); err != nil {
return nil, err
}
go func() {
t, err := tail.TailFile(grpcControlProcess.StderrPath(), tail.Config{Follow: true})
if err != nil {
log.Debug().Msgf("Could not tail stderr")
}
for line := range t.Lines {
log.Debug().Msgf("GRPC(%s): stderr %s", strings.Join([]string{backend, o.modelFile, serverAddress}, "-"), line.Text)
}
}()
go func() {
t, err := tail.TailFile(grpcControlProcess.StdoutPath(), tail.Config{Follow: true})
if err != nil {
log.Debug().Msgf("Could not tail stdout")
}
for line := range t.Lines {
log.Debug().Msgf("GRPC(%s): stderr %s", strings.Join([]string{backend, o.modelFile, serverAddress}, "-"), line.Text)
}
}()
log.Debug().Msgf("GRPC Service Started")
client := grpc.NewClient(serverAddress)
// Wait for the service to start up
ready := false
for i := 0; i < 10; i++ {
if client.HealthCheck(context.Background()) {
log.Debug().Msgf("GRPC Service Ready")
ready = true
break
}
time.Sleep(1 * time.Second)
}
if !ready {
log.Debug().Msgf("GRPC Service NOT ready")
log.Debug().Msgf("Alive: ", grpcControlProcess.IsAlive())
log.Debug().Msgf(fmt.Sprintf("GRPC Service Exitcode:"))
log.Debug().Msgf(grpcControlProcess.ExitCode())
return nil, fmt.Errorf("grpc service not ready")
}
options := *o.gRPCOptions
options.Model = s
log.Debug().Msgf("GRPC: Loading model with options: %+v", options)
res, err := client.LoadModel(context.TODO(), &options)
if err != nil {
return nil, err
}
if !res.Success {
return nil, fmt.Errorf("could not load model: %s", res.Message)
}
return client, nil
}
}
func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOption, threads uint32, assetDir string) (interface{}, error) {
log.Debug().Msgf("Loading model '%s' greedly", modelFile)
func (ml *ModelLoader) BackendLoader(opts ...Option) (model interface{}, err error) {
//backendString string, modelFile string, llamaOpts []llama.ModelOption, threads uint32, assetDir string) (model interface{}, err error) {
o := NewOptions(opts...)
log.Debug().Msgf("Loading model %s from %s", o.backendString, o.modelFile)
switch strings.ToLower(o.backendString) {
case LlamaBackend:
return ml.LoadModel(o.modelFile, llamaLM(o.llamaOpts...))
case BloomzBackend:
return ml.LoadModel(o.modelFile, bloomzLM)
case GPTJBackend:
return ml.LoadModel(o.modelFile, gptJ)
case DollyBackend:
return ml.LoadModel(o.modelFile, dolly)
case MPTBackend:
return ml.LoadModel(o.modelFile, mpt)
case Gpt2Backend:
return ml.LoadModel(o.modelFile, transformersLM)
case FalconBackend:
return ml.LoadModel(o.modelFile, ml.grpcModel(FalconBackend, o))
case GPTNeoXBackend:
return ml.LoadModel(o.modelFile, gptNeoX)
case ReplitBackend:
return ml.LoadModel(o.modelFile, replit)
case StableDiffusionBackend:
return ml.LoadModel(o.modelFile, stableDiffusion)
case PiperBackend:
return ml.LoadModel(o.modelFile, piperTTS(filepath.Join(o.assetDir, "backend-assets", "espeak-ng-data")))
case StarcoderBackend:
return ml.LoadModel(o.modelFile, starCoder)
case Gpt4AllLlamaBackend, Gpt4AllMptBackend, Gpt4AllJBackend, Gpt4All:
return ml.LoadModel(o.modelFile, gpt4allLM(gpt4all.SetThreads(int(o.threads)), gpt4all.SetLibrarySearchPath(filepath.Join(o.assetDir, "backend-assets", "gpt4all"))))
case BertEmbeddingsBackend:
return ml.LoadModel(o.modelFile, bertEmbeddings)
case RwkvBackend:
return ml.LoadModel(o.modelFile, rwkvLM(filepath.Join(ml.ModelPath, o.modelFile+tokenizerSuffix), o.threads))
case WhisperBackend:
return ml.LoadModel(o.modelFile, whisperModel)
case LCHuggingFaceBackend:
return ml.LoadModel(o.modelFile, lcHuggingFace)
default:
return nil, fmt.Errorf("backend unsupported: %s", o.backendString)
}
}
func (ml *ModelLoader) GreedyLoader(opts ...Option) (interface{}, error) {
o := NewOptions(opts...)
log.Debug().Msgf("Loading model '%s' greedly", o.modelFile)
ml.mu.Lock()
m, exists := ml.models[modelFile]
m, exists := ml.models[o.modelFile]
if exists {
log.Debug().Msgf("Model '%s' already loaded", modelFile)
log.Debug().Msgf("Model '%s' already loaded", o.modelFile)
ml.mu.Unlock()
return m, nil
}
@ -203,7 +321,15 @@ func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOpt
continue
}
log.Debug().Msgf("[%s] Attempting to load", b)
model, modelerr := ml.BackendLoader(b, modelFile, llamaOpts, threads, assetDir)
model, modelerr := ml.BackendLoader(
WithBackendString(b),
WithModelFile(o.modelFile),
WithLlamaOpts(o.llamaOpts...),
WithLoadGRPCOpts(o.gRPCOptions),
WithThreads(o.threads),
WithAssetDir(o.assetDir),
)
if modelerr == nil && model != nil {
log.Debug().Msgf("[%s] Loads OK", b)
return model, nil

View file

@ -10,6 +10,7 @@ import (
"sync"
"text/template"
process "github.com/mudler/go-processmanager"
"github.com/rs/zerolog/log"
)
@ -18,6 +19,7 @@ type ModelLoader struct {
mu sync.Mutex
// TODO: this needs generics
models map[string]interface{}
grpcProcesses map[string]*process.Process
promptsTemplates map[string]*template.Template
}
@ -26,6 +28,7 @@ func NewModelLoader(modelPath string) *ModelLoader {
ModelPath: modelPath,
models: make(map[string]interface{}),
promptsTemplates: make(map[string]*template.Template),
grpcProcesses: make(map[string]*process.Process),
}
}

62
pkg/model/options.go Normal file
View file

@ -0,0 +1,62 @@
package model
import (
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
llama "github.com/go-skynet/go-llama.cpp"
)
type Options struct {
backendString string
modelFile string
llamaOpts []llama.ModelOption
threads uint32
assetDir string
gRPCOptions *pb.ModelOptions
}
type Option func(*Options)
func WithBackendString(backend string) Option {
return func(o *Options) {
o.backendString = backend
}
}
func WithModelFile(modelFile string) Option {
return func(o *Options) {
o.modelFile = modelFile
}
}
func WithLoadGRPCOpts(opts *pb.ModelOptions) Option {
return func(o *Options) {
o.gRPCOptions = opts
}
}
func WithLlamaOpts(opts ...llama.ModelOption) Option {
return func(o *Options) {
o.llamaOpts = append(o.llamaOpts, opts...)
}
}
func WithThreads(threads uint32) Option {
return func(o *Options) {
o.threads = threads
}
}
func WithAssetDir(assetDir string) Option {
return func(o *Options) {
o.assetDir = assetDir
}
}
func NewOptions(opts ...Option) *Options {
o := &Options{}
for _, opt := range opts {
opt(o)
}
return o
}