mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-27 22:15:00 +00:00
feat(grpc): backend SPI pluggable in embedding mode (#1621)
* run server * grpc backend embedded support * backend providable
This commit is contained in:
parent
efe2883c5d
commit
d5d82ba344
8 changed files with 196 additions and 20 deletions
46
pkg/grpc/backend.go
Normal file
46
pkg/grpc/backend.go
Normal file
|
@ -0,0 +1,46 @@
|
|||
package grpc
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/go-skynet/LocalAI/api/schema"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
"google.golang.org/grpc"
|
||||
)
|
||||
|
||||
var embeds = map[string]*embedBackend{}
|
||||
|
||||
func Provide(addr string, llm LLM) {
|
||||
embeds[addr] = &embedBackend{s: &server{llm: llm}}
|
||||
}
|
||||
|
||||
func NewClient(address string, parallel bool, wd WatchDog, enableWatchDog bool) Backend {
|
||||
if bc, ok := embeds[address]; ok {
|
||||
return bc
|
||||
}
|
||||
return NewGrpcClient(address, parallel, wd, enableWatchDog)
|
||||
}
|
||||
|
||||
func NewGrpcClient(address string, parallel bool, wd WatchDog, enableWatchDog bool) Backend {
|
||||
if !enableWatchDog {
|
||||
wd = nil
|
||||
}
|
||||
return &Client{
|
||||
address: address,
|
||||
parallel: parallel,
|
||||
wd: wd,
|
||||
}
|
||||
}
|
||||
|
||||
type Backend interface {
|
||||
IsBusy() bool
|
||||
HealthCheck(ctx context.Context) (bool, error)
|
||||
Embeddings(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.EmbeddingResult, error)
|
||||
Predict(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.Reply, error)
|
||||
LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grpc.CallOption) (*pb.Result, error)
|
||||
PredictStream(ctx context.Context, in *pb.PredictOptions, f func(s []byte), opts ...grpc.CallOption) error
|
||||
GenerateImage(ctx context.Context, in *pb.GenerateImageRequest, opts ...grpc.CallOption) (*pb.Result, error)
|
||||
TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error)
|
||||
AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.Result, error)
|
||||
TokenizeString(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.TokenizationResponse, error)
|
||||
Status(ctx context.Context) (*pb.StatusResponse, error)
|
||||
}
|
|
@ -27,17 +27,6 @@ type WatchDog interface {
|
|||
UnMark(address string)
|
||||
}
|
||||
|
||||
func NewClient(address string, parallel bool, wd WatchDog, enableWatchDog bool) *Client {
|
||||
if !enableWatchDog {
|
||||
wd = nil
|
||||
}
|
||||
return &Client{
|
||||
address: address,
|
||||
parallel: parallel,
|
||||
wd: wd,
|
||||
}
|
||||
}
|
||||
|
||||
func (c *Client) IsBusy() bool {
|
||||
c.Lock()
|
||||
defer c.Unlock()
|
||||
|
|
121
pkg/grpc/embed.go
Normal file
121
pkg/grpc/embed.go
Normal file
|
@ -0,0 +1,121 @@
|
|||
package grpc
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/go-skynet/LocalAI/api/schema"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/metadata"
|
||||
"time"
|
||||
)
|
||||
|
||||
var _ Backend = new(embedBackend)
|
||||
var _ pb.Backend_PredictStreamServer = new(embedBackendServerStream)
|
||||
|
||||
type embedBackend struct {
|
||||
s *server
|
||||
}
|
||||
|
||||
func (e *embedBackend) IsBusy() bool {
|
||||
return e.s.llm.Busy()
|
||||
}
|
||||
|
||||
func (e *embedBackend) HealthCheck(ctx context.Context) (bool, error) {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func (e *embedBackend) Embeddings(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.EmbeddingResult, error) {
|
||||
return e.s.Embedding(ctx, in)
|
||||
}
|
||||
|
||||
func (e *embedBackend) Predict(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.Reply, error) {
|
||||
return e.s.Predict(ctx, in)
|
||||
}
|
||||
|
||||
func (e *embedBackend) LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grpc.CallOption) (*pb.Result, error) {
|
||||
return e.s.LoadModel(ctx, in)
|
||||
}
|
||||
|
||||
func (e *embedBackend) PredictStream(ctx context.Context, in *pb.PredictOptions, f func(s []byte), opts ...grpc.CallOption) error {
|
||||
bs := &embedBackendServerStream{
|
||||
ctx: ctx,
|
||||
fn: f,
|
||||
}
|
||||
return e.s.PredictStream(in, bs)
|
||||
}
|
||||
|
||||
func (e *embedBackend) GenerateImage(ctx context.Context, in *pb.GenerateImageRequest, opts ...grpc.CallOption) (*pb.Result, error) {
|
||||
return e.s.GenerateImage(ctx, in)
|
||||
}
|
||||
|
||||
func (e *embedBackend) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error) {
|
||||
return e.s.TTS(ctx, in)
|
||||
}
|
||||
|
||||
func (e *embedBackend) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.Result, error) {
|
||||
r, err := e.s.AudioTranscription(ctx, in)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tr := &schema.Result{}
|
||||
for _, s := range r.Segments {
|
||||
var tks []int
|
||||
for _, t := range s.Tokens {
|
||||
tks = append(tks, int(t))
|
||||
}
|
||||
tr.Segments = append(tr.Segments,
|
||||
schema.Segment{
|
||||
Text: s.Text,
|
||||
Id: int(s.Id),
|
||||
Start: time.Duration(s.Start),
|
||||
End: time.Duration(s.End),
|
||||
Tokens: tks,
|
||||
})
|
||||
}
|
||||
tr.Text = r.Text
|
||||
return tr, err
|
||||
}
|
||||
|
||||
func (e *embedBackend) TokenizeString(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.TokenizationResponse, error) {
|
||||
return e.s.TokenizeString(ctx, in)
|
||||
}
|
||||
|
||||
func (e *embedBackend) Status(ctx context.Context) (*pb.StatusResponse, error) {
|
||||
return e.s.Status(ctx, &pb.HealthMessage{})
|
||||
}
|
||||
|
||||
type embedBackendServerStream struct {
|
||||
ctx context.Context
|
||||
fn func(s []byte)
|
||||
}
|
||||
|
||||
func (e *embedBackendServerStream) Send(reply *pb.Reply) error {
|
||||
e.fn(reply.GetMessage())
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *embedBackendServerStream) SetHeader(md metadata.MD) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *embedBackendServerStream) SendHeader(md metadata.MD) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *embedBackendServerStream) SetTrailer(md metadata.MD) {
|
||||
}
|
||||
|
||||
func (e *embedBackendServerStream) Context() context.Context {
|
||||
return e.ctx
|
||||
}
|
||||
|
||||
func (e *embedBackendServerStream) SendMsg(m any) error {
|
||||
if x, ok := m.(*pb.Reply); ok {
|
||||
return e.Send(x)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *embedBackendServerStream) RecvMsg(m any) error {
|
||||
return nil
|
||||
}
|
|
@ -181,3 +181,23 @@ func StartServer(address string, model LLM) error {
|
|||
|
||||
return nil
|
||||
}
|
||||
|
||||
func RunServer(address string, model LLM) (func() error, error) {
|
||||
lis, err := net.Listen("tcp", address)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
s := grpc.NewServer()
|
||||
pb.RegisterBackendServer(s, &server{llm: model})
|
||||
log.Printf("gRPC Server listening at %v", lis.Addr())
|
||||
if err = s.Serve(lis); err != nil {
|
||||
return func() error {
|
||||
return lis.Close()
|
||||
}, err
|
||||
}
|
||||
|
||||
return func() error {
|
||||
s.GracefulStop()
|
||||
return nil
|
||||
}, nil
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue