feat: more embedded models, coqui fixes, add model usage and description (#1556)

* feat: add model descriptions and usage

* remove default model gallery

* models: add embeddings and tts

* docs: update table

* docs: updates

* images: cleanup pip cache after install

* images: always run apt-get clean

* ux: improve gRPC connection errors

* ux: improve some messages

* fix: fix coqui when no AudioPath is passed by

* embedded: add more models

* Add usage

* Reorder table
This commit is contained in:
Ettore Di Giacinto 2024-01-08 00:37:02 +01:00 committed by GitHub
parent 0843fe6c65
commit e19d7226f8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
21 changed files with 216 additions and 45 deletions

View file

@ -50,7 +50,7 @@ func (c *Client) setBusy(v bool) {
c.Unlock()
}
func (c *Client) HealthCheck(ctx context.Context) bool {
func (c *Client) HealthCheck(ctx context.Context) (bool, error) {
if !c.parallel {
c.opMutex.Lock()
defer c.opMutex.Unlock()
@ -59,8 +59,7 @@ func (c *Client) HealthCheck(ctx context.Context) bool {
defer c.setBusy(false)
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
if err != nil {
fmt.Println(err)
return false
return false, err
}
defer conn.Close()
client := pb.NewBackendClient(conn)
@ -71,15 +70,14 @@ func (c *Client) HealthCheck(ctx context.Context) bool {
res, err := client.Health(ctx, &pb.HealthMessage{})
if err != nil {
fmt.Println(err)
return false
return false, err
}
if string(res.Message) == "OK" {
return true
return true, nil
}
return false
return false, fmt.Errorf("health check failed: %s", res.Message)
}
func (c *Client) Embeddings(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.EmbeddingResult, error) {

View file

@ -131,11 +131,15 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
// Wait for the service to start up
ready := false
for i := 0; i < o.grpcAttempts; i++ {
if client.GRPC(o.parallelRequests, ml.wd).HealthCheck(context.Background()) {
alive, err := client.GRPC(o.parallelRequests, ml.wd).HealthCheck(context.Background())
if alive {
log.Debug().Msgf("GRPC Service Ready")
ready = true
break
}
if err != nil && i == o.grpcAttempts-1 {
log.Error().Msgf("Failed starting/connecting to the gRPC service: %s", err.Error())
}
time.Sleep(time.Duration(o.grpcAttemptsDelay) * time.Second)
}
@ -176,7 +180,11 @@ func (ml *ModelLoader) resolveAddress(addr ModelAddress, parallel bool) (*grpc.C
func (ml *ModelLoader) BackendLoader(opts ...Option) (client *grpc.Client, err error) {
o := NewOptions(opts...)
log.Info().Msgf("Loading model '%s' with backend %s", o.model, o.backendString)
if o.model != "" {
log.Info().Msgf("Loading model '%s' with backend %s", o.model, o.backendString)
} else {
log.Info().Msgf("Loading model with backend %s", o.backendString)
}
backend := strings.ToLower(o.backendString)
if realBackend, exists := Aliases[backend]; exists {
@ -239,7 +247,10 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (*grpc.Client, error) {
for _, b := range o.externalBackends {
allBackendsToAutoLoad = append(allBackendsToAutoLoad, b)
}
log.Info().Msgf("Loading model '%s' greedly from all the available backends: %s", o.model, strings.Join(allBackendsToAutoLoad, ", "))
if o.model != "" {
log.Info().Msgf("Trying to load the model '%s' with all the available backends: %s", o.model, strings.Join(allBackendsToAutoLoad, ", "))
}
for _, b := range allBackendsToAutoLoad {
log.Info().Msgf("[%s] Attempting to load", b)

View file

@ -171,9 +171,10 @@ func (ml *ModelLoader) CheckIsLoaded(s string) ModelAddress {
} else {
client = m.GRPC(false, ml.wd)
}
if !client.HealthCheck(context.Background()) {
log.Debug().Msgf("GRPC Model not responding: %s", s)
alive, err := client.HealthCheck(context.Background())
if !alive {
log.Warn().Msgf("GRPC Model not responding: %s", err.Error())
log.Warn().Msgf("Deleting the process in order to recreate it")
if !ml.grpcProcesses[s].IsAlive() {
log.Debug().Msgf("GRPC Process is not responding: %s", s)
// stop and delete the process, this forces to re-load the model and re-create again the service