mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-28 14:35:00 +00:00
feat: more embedded models, coqui fixes, add model usage and description (#1556)
* feat: add model descriptions and usage * remove default model gallery * models: add embeddings and tts * docs: update table * docs: updates * images: cleanup pip cache after install * images: always run apt-get clean * ux: improve gRPC connection errors * ux: improve some messages * fix: fix coqui when no AudioPath is passed by * embedded: add more models * Add usage * Reorder table
This commit is contained in:
parent
0843fe6c65
commit
e19d7226f8
21 changed files with 216 additions and 45 deletions
|
@ -50,7 +50,7 @@ func (c *Client) setBusy(v bool) {
|
|||
c.Unlock()
|
||||
}
|
||||
|
||||
func (c *Client) HealthCheck(ctx context.Context) bool {
|
||||
func (c *Client) HealthCheck(ctx context.Context) (bool, error) {
|
||||
if !c.parallel {
|
||||
c.opMutex.Lock()
|
||||
defer c.opMutex.Unlock()
|
||||
|
@ -59,8 +59,7 @@ func (c *Client) HealthCheck(ctx context.Context) bool {
|
|||
defer c.setBusy(false)
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
return false
|
||||
return false, err
|
||||
}
|
||||
defer conn.Close()
|
||||
client := pb.NewBackendClient(conn)
|
||||
|
@ -71,15 +70,14 @@ func (c *Client) HealthCheck(ctx context.Context) bool {
|
|||
|
||||
res, err := client.Health(ctx, &pb.HealthMessage{})
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
|
||||
return false
|
||||
return false, err
|
||||
}
|
||||
|
||||
if string(res.Message) == "OK" {
|
||||
return true
|
||||
return true, nil
|
||||
}
|
||||
return false
|
||||
|
||||
return false, fmt.Errorf("health check failed: %s", res.Message)
|
||||
}
|
||||
|
||||
func (c *Client) Embeddings(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.EmbeddingResult, error) {
|
||||
|
|
|
@ -131,11 +131,15 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
|
|||
// Wait for the service to start up
|
||||
ready := false
|
||||
for i := 0; i < o.grpcAttempts; i++ {
|
||||
if client.GRPC(o.parallelRequests, ml.wd).HealthCheck(context.Background()) {
|
||||
alive, err := client.GRPC(o.parallelRequests, ml.wd).HealthCheck(context.Background())
|
||||
if alive {
|
||||
log.Debug().Msgf("GRPC Service Ready")
|
||||
ready = true
|
||||
break
|
||||
}
|
||||
if err != nil && i == o.grpcAttempts-1 {
|
||||
log.Error().Msgf("Failed starting/connecting to the gRPC service: %s", err.Error())
|
||||
}
|
||||
time.Sleep(time.Duration(o.grpcAttemptsDelay) * time.Second)
|
||||
}
|
||||
|
||||
|
@ -176,7 +180,11 @@ func (ml *ModelLoader) resolveAddress(addr ModelAddress, parallel bool) (*grpc.C
|
|||
func (ml *ModelLoader) BackendLoader(opts ...Option) (client *grpc.Client, err error) {
|
||||
o := NewOptions(opts...)
|
||||
|
||||
log.Info().Msgf("Loading model '%s' with backend %s", o.model, o.backendString)
|
||||
if o.model != "" {
|
||||
log.Info().Msgf("Loading model '%s' with backend %s", o.model, o.backendString)
|
||||
} else {
|
||||
log.Info().Msgf("Loading model with backend %s", o.backendString)
|
||||
}
|
||||
|
||||
backend := strings.ToLower(o.backendString)
|
||||
if realBackend, exists := Aliases[backend]; exists {
|
||||
|
@ -239,7 +247,10 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (*grpc.Client, error) {
|
|||
for _, b := range o.externalBackends {
|
||||
allBackendsToAutoLoad = append(allBackendsToAutoLoad, b)
|
||||
}
|
||||
log.Info().Msgf("Loading model '%s' greedly from all the available backends: %s", o.model, strings.Join(allBackendsToAutoLoad, ", "))
|
||||
|
||||
if o.model != "" {
|
||||
log.Info().Msgf("Trying to load the model '%s' with all the available backends: %s", o.model, strings.Join(allBackendsToAutoLoad, ", "))
|
||||
}
|
||||
|
||||
for _, b := range allBackendsToAutoLoad {
|
||||
log.Info().Msgf("[%s] Attempting to load", b)
|
||||
|
|
|
@ -171,9 +171,10 @@ func (ml *ModelLoader) CheckIsLoaded(s string) ModelAddress {
|
|||
} else {
|
||||
client = m.GRPC(false, ml.wd)
|
||||
}
|
||||
|
||||
if !client.HealthCheck(context.Background()) {
|
||||
log.Debug().Msgf("GRPC Model not responding: %s", s)
|
||||
alive, err := client.HealthCheck(context.Background())
|
||||
if !alive {
|
||||
log.Warn().Msgf("GRPC Model not responding: %s", err.Error())
|
||||
log.Warn().Msgf("Deleting the process in order to recreate it")
|
||||
if !ml.grpcProcesses[s].IsAlive() {
|
||||
log.Debug().Msgf("GRPC Process is not responding: %s", s)
|
||||
// stop and delete the process, this forces to re-load the model and re-create again the service
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue