feat: queue up requests if not running parallel requests (#1296)

Return a GRPC which handles a lock in case it is not meant to be
parallel.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2023-11-16 22:20:16 +01:00 committed by GitHub
parent 2addb9f99a
commit 548959b50f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 64 additions and 16 deletions

View file

@ -121,7 +121,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
// Wait for the service to start up
ready := false
for i := 0; i < o.grpcAttempts; i++ {
if client.GRPC().HealthCheck(context.Background()) {
if client.GRPC(o.parallelRequests).HealthCheck(context.Background()) {
log.Debug().Msgf("GRPC Service Ready")
ready = true
break
@ -140,7 +140,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
log.Debug().Msgf("GRPC: Loading model with options: %+v", options)
res, err := client.GRPC().LoadModel(o.context, &options)
res, err := client.GRPC(o.parallelRequests).LoadModel(o.context, &options)
if err != nil {
return "", fmt.Errorf("could not load model: %w", err)
}
@ -154,11 +154,11 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
func (ml *ModelLoader) resolveAddress(addr ModelAddress, parallel bool) (*grpc.Client, error) {
if parallel {
return addr.GRPC(), nil
return addr.GRPC(parallel), nil
}
if _, ok := ml.grpcClients[string(addr)]; !ok {
ml.grpcClients[string(addr)] = addr.GRPC()
ml.grpcClients[string(addr)] = addr.GRPC(parallel)
}
return ml.grpcClients[string(addr)], nil
}

View file

@ -67,8 +67,8 @@ type ModelLoader struct {
type ModelAddress string
func (m ModelAddress) GRPC() *grpc.Client {
return grpc.NewClient(string(m))
func (m ModelAddress) GRPC(parallel bool) *grpc.Client {
return grpc.NewClient(string(m), parallel)
}
func NewModelLoader(modelPath string) *ModelLoader {
@ -147,10 +147,16 @@ func (ml *ModelLoader) ShutdownModel(modelName string) error {
}
func (ml *ModelLoader) CheckIsLoaded(s string) ModelAddress {
var client *grpc.Client
if m, ok := ml.models[s]; ok {
log.Debug().Msgf("Model already loaded in memory: %s", s)
if c, ok := ml.grpcClients[s]; ok {
client = c
} else {
client = m.GRPC(false)
}
if !m.GRPC().HealthCheck(context.Background()) {
if !client.HealthCheck(context.Background()) {
log.Debug().Msgf("GRPC Model not responding: %s", s)
if !ml.grpcProcesses[s].IsAlive() {
log.Debug().Msgf("GRPC Process is not responding: %s", s)

View file

@ -17,7 +17,7 @@ import (
func (ml *ModelLoader) StopAllExcept(s string) {
ml.StopGRPC(func(id string, p *process.Process) bool {
if id != s {
for ml.models[id].GRPC().IsBusy() {
for ml.models[id].GRPC(false).IsBusy() {
log.Debug().Msgf("%s busy. Waiting.", id)
time.Sleep(2 * time.Second)
}