feat: queue up requests if not running parallel requests (#1296)

Return a GRPC which handles a lock in case it is not meant to be
parallel.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2023-11-16 22:20:16 +01:00 committed by GitHub
parent 2addb9f99a
commit 548959b50f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 64 additions and 16 deletions

View file

@ -67,8 +67,8 @@ type ModelLoader struct {
type ModelAddress string
func (m ModelAddress) GRPC() *grpc.Client {
return grpc.NewClient(string(m))
func (m ModelAddress) GRPC(parallel bool) *grpc.Client {
return grpc.NewClient(string(m), parallel)
}
func NewModelLoader(modelPath string) *ModelLoader {
@ -147,10 +147,16 @@ func (ml *ModelLoader) ShutdownModel(modelName string) error {
}
func (ml *ModelLoader) CheckIsLoaded(s string) ModelAddress {
var client *grpc.Client
if m, ok := ml.models[s]; ok {
log.Debug().Msgf("Model already loaded in memory: %s", s)
if c, ok := ml.grpcClients[s]; ok {
client = c
} else {
client = m.GRPC(false)
}
if !m.GRPC().HealthCheck(context.Background()) {
if !client.HealthCheck(context.Background()) {
log.Debug().Msgf("GRPC Model not responding: %s", s)
if !ml.grpcProcesses[s].IsAlive() {
log.Debug().Msgf("GRPC Process is not responding: %s", s)