mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-20 02:24:59 +00:00
feat: queue up requests if not running parallel requests (#1296)
Return a GRPC which handles a lock in case it is not meant to be parallel. Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
2addb9f99a
commit
548959b50f
5 changed files with 64 additions and 16 deletions
|
@ -14,14 +14,17 @@ import (
|
|||
)
|
||||
|
||||
type Client struct {
|
||||
address string
|
||||
busy bool
|
||||
address string
|
||||
busy bool
|
||||
parallel bool
|
||||
sync.Mutex
|
||||
opMutex sync.Mutex
|
||||
}
|
||||
|
||||
func NewClient(address string) *Client {
|
||||
func NewClient(address string, parallel bool) *Client {
|
||||
return &Client{
|
||||
address: address,
|
||||
address: address,
|
||||
parallel: parallel,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -38,6 +41,10 @@ func (c *Client) setBusy(v bool) {
|
|||
}
|
||||
|
||||
func (c *Client) HealthCheck(ctx context.Context) bool {
|
||||
if !c.parallel {
|
||||
c.opMutex.Lock()
|
||||
defer c.opMutex.Unlock()
|
||||
}
|
||||
c.setBusy(true)
|
||||
defer c.setBusy(false)
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||
|
@ -66,6 +73,10 @@ func (c *Client) HealthCheck(ctx context.Context) bool {
|
|||
}
|
||||
|
||||
func (c *Client) Embeddings(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.EmbeddingResult, error) {
|
||||
if !c.parallel {
|
||||
c.opMutex.Lock()
|
||||
defer c.opMutex.Unlock()
|
||||
}
|
||||
c.setBusy(true)
|
||||
defer c.setBusy(false)
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||
|
@ -79,6 +90,10 @@ func (c *Client) Embeddings(ctx context.Context, in *pb.PredictOptions, opts ...
|
|||
}
|
||||
|
||||
func (c *Client) Predict(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.Reply, error) {
|
||||
if !c.parallel {
|
||||
c.opMutex.Lock()
|
||||
defer c.opMutex.Unlock()
|
||||
}
|
||||
c.setBusy(true)
|
||||
defer c.setBusy(false)
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||
|
@ -92,6 +107,10 @@ func (c *Client) Predict(ctx context.Context, in *pb.PredictOptions, opts ...grp
|
|||
}
|
||||
|
||||
func (c *Client) LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grpc.CallOption) (*pb.Result, error) {
|
||||
if !c.parallel {
|
||||
c.opMutex.Lock()
|
||||
defer c.opMutex.Unlock()
|
||||
}
|
||||
c.setBusy(true)
|
||||
defer c.setBusy(false)
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||
|
@ -104,6 +123,10 @@ func (c *Client) LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grp
|
|||
}
|
||||
|
||||
func (c *Client) PredictStream(ctx context.Context, in *pb.PredictOptions, f func(s []byte), opts ...grpc.CallOption) error {
|
||||
if !c.parallel {
|
||||
c.opMutex.Lock()
|
||||
defer c.opMutex.Unlock()
|
||||
}
|
||||
c.setBusy(true)
|
||||
defer c.setBusy(false)
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||
|
@ -135,6 +158,10 @@ func (c *Client) PredictStream(ctx context.Context, in *pb.PredictOptions, f fun
|
|||
}
|
||||
|
||||
func (c *Client) GenerateImage(ctx context.Context, in *pb.GenerateImageRequest, opts ...grpc.CallOption) (*pb.Result, error) {
|
||||
if !c.parallel {
|
||||
c.opMutex.Lock()
|
||||
defer c.opMutex.Unlock()
|
||||
}
|
||||
c.setBusy(true)
|
||||
defer c.setBusy(false)
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||
|
@ -147,6 +174,10 @@ func (c *Client) GenerateImage(ctx context.Context, in *pb.GenerateImageRequest,
|
|||
}
|
||||
|
||||
func (c *Client) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error) {
|
||||
if !c.parallel {
|
||||
c.opMutex.Lock()
|
||||
defer c.opMutex.Unlock()
|
||||
}
|
||||
c.setBusy(true)
|
||||
defer c.setBusy(false)
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||
|
@ -159,6 +190,10 @@ func (c *Client) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOp
|
|||
}
|
||||
|
||||
func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.Result, error) {
|
||||
if !c.parallel {
|
||||
c.opMutex.Lock()
|
||||
defer c.opMutex.Unlock()
|
||||
}
|
||||
c.setBusy(true)
|
||||
defer c.setBusy(false)
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||
|
@ -191,6 +226,10 @@ func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptReques
|
|||
}
|
||||
|
||||
func (c *Client) TokenizeString(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.TokenizationResponse, error) {
|
||||
if !c.parallel {
|
||||
c.opMutex.Lock()
|
||||
defer c.opMutex.Unlock()
|
||||
}
|
||||
c.setBusy(true)
|
||||
defer c.setBusy(false)
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||
|
@ -209,6 +248,10 @@ func (c *Client) TokenizeString(ctx context.Context, in *pb.PredictOptions, opts
|
|||
}
|
||||
|
||||
func (c *Client) Status(ctx context.Context) (*pb.StatusResponse, error) {
|
||||
if !c.parallel {
|
||||
c.opMutex.Lock()
|
||||
defer c.opMutex.Unlock()
|
||||
}
|
||||
c.setBusy(true)
|
||||
defer c.setBusy(false)
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue