feat: more embedded models, coqui fixes, add model usage and description (#1556)

* feat: add model descriptions and usage * remove default model gallery * models: add embeddings and tts * docs: update table * docs: updates * images: cleanup pip cache after install * images: always run apt-get clean * ux: improve gRPC connection errors * ux: improve some messages * fix: fix coqui when no AudioPath is passed by * embedded: add more models * Add usage * Reorder table
2025-05-28 14:35:00 +00:00 · 2024-01-08 00:37:02 +01:00 · 2024-01-08 00:37:02 +01:00 · e19d7226f8
commit e19d7226f8
parent 0843fe6c65
21 changed files with 216 additions and 45 deletions
--- a/pkg/grpc/client.go
+++ b/pkg/grpc/client.go
@ -50,7 +50,7 @@ func (c *Client) setBusy(v bool) {
 	c.Unlock()
 }

-func (c *Client) HealthCheck(ctx context.Context) bool {
+func (c *Client) HealthCheck(ctx context.Context) (bool, error) {
 	if !c.parallel {
 		c.opMutex.Lock()
 		defer c.opMutex.Unlock()
@ -59,8 +59,7 @@ func (c *Client) HealthCheck(ctx context.Context) bool {
 	defer c.setBusy(false)
 	conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
 	if err != nil {
-		fmt.Println(err)
-		return false
+		return false, err
 	}
 	defer conn.Close()
 	client := pb.NewBackendClient(conn)
@ -71,15 +70,14 @@ func (c *Client) HealthCheck(ctx context.Context) bool {

 	res, err := client.Health(ctx, &pb.HealthMessage{})
 	if err != nil {
-		fmt.Println(err)
-
-		return false
+		return false, err
 	}

 	if string(res.Message) == "OK" {
-		return true
+		return true, nil
 	}
-	return false
+
+	return false, fmt.Errorf("health check failed: %s", res.Message)
 }

 func (c *Client) Embeddings(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.EmbeddingResult, error) {
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@ -131,11 +131,15 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
 		// Wait for the service to start up
 		ready := false
 		for i := 0; i < o.grpcAttempts; i++ {
-			if client.GRPC(o.parallelRequests, ml.wd).HealthCheck(context.Background()) {
+			alive, err := client.GRPC(o.parallelRequests, ml.wd).HealthCheck(context.Background())
+			if alive {
 				log.Debug().Msgf("GRPC Service Ready")
 				ready = true
 				break
 			}
+			if err != nil && i == o.grpcAttempts-1 {
+				log.Error().Msgf("Failed starting/connecting to the gRPC service: %s", err.Error())
+			}
 			time.Sleep(time.Duration(o.grpcAttemptsDelay) * time.Second)
 		}

@ -176,7 +180,11 @@ func (ml *ModelLoader) resolveAddress(addr ModelAddress, parallel bool) (*grpc.C
 func (ml *ModelLoader) BackendLoader(opts ...Option) (client *grpc.Client, err error) {
 	o := NewOptions(opts...)

-	log.Info().Msgf("Loading model '%s' with backend %s", o.model, o.backendString)
+	if o.model != "" {
+		log.Info().Msgf("Loading model '%s' with backend %s", o.model, o.backendString)
+	} else {
+		log.Info().Msgf("Loading model with backend %s", o.backendString)
+	}

 	backend := strings.ToLower(o.backendString)
 	if realBackend, exists := Aliases[backend]; exists {
@ -239,7 +247,10 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (*grpc.Client, error) {
 	for _, b := range o.externalBackends {
 		allBackendsToAutoLoad = append(allBackendsToAutoLoad, b)
 	}
-	log.Info().Msgf("Loading model '%s' greedly from all the available backends: %s", o.model, strings.Join(allBackendsToAutoLoad, ", "))
+
+	if o.model != "" {
+		log.Info().Msgf("Trying to load the model '%s' with all the available backends: %s", o.model, strings.Join(allBackendsToAutoLoad, ", "))
+	}

 	for _, b := range allBackendsToAutoLoad {
 		log.Info().Msgf("[%s] Attempting to load", b)
--- a/pkg/model/loader.go
+++ b/pkg/model/loader.go
@ -171,9 +171,10 @@ func (ml *ModelLoader) CheckIsLoaded(s string) ModelAddress {
 		} else {
 			client = m.GRPC(false, ml.wd)
 		}
-
-		if !client.HealthCheck(context.Background()) {
-			log.Debug().Msgf("GRPC Model not responding: %s", s)
+		alive, err := client.HealthCheck(context.Background())
+		if !alive {
+			log.Warn().Msgf("GRPC Model not responding: %s", err.Error())
+			log.Warn().Msgf("Deleting the process in order to recreate it")
 			if !ml.grpcProcesses[s].IsAlive() {
 				log.Debug().Msgf("GRPC Process is not responding: %s", s)
 				// stop and delete the process, this forces to re-load the model and re-create again the service