feat: add --single-active-backend to allow only one backend active at the time (#925)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2023-08-19 01:49:33 +02:00 committed by GitHub
parent 1079b18ff7
commit afdc0ebfd7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
20 changed files with 238 additions and 164 deletions

View file

@ -4,20 +4,14 @@ import (
"context"
"fmt"
"os"
"os/signal"
"path/filepath"
"strconv"
"strings"
"syscall"
"time"
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
"github.com/hashicorp/go-multierror"
"github.com/hpcloud/tail"
"github.com/phayes/freeport"
"github.com/rs/zerolog/log"
process "github.com/mudler/go-processmanager"
)
const (
@ -65,89 +59,6 @@ var AutoLoadBackends []string = []string{
PiperBackend,
}
func (ml *ModelLoader) GetGRPCPID(id string) (int, error) {
p, exists := ml.grpcProcesses[id]
if !exists {
return -1, fmt.Errorf("no grpc backend found for %s", id)
}
return strconv.Atoi(p.PID)
}
type GRPCProcessFilter = func(p *process.Process) bool
func includeAllProcesses(_ *process.Process) bool {
return true
}
func (ml *ModelLoader) StopGRPC(filter GRPCProcessFilter) {
for _, p := range ml.grpcProcesses {
if filter(p) {
p.Stop()
}
}
}
func (ml *ModelLoader) StopAllGRPC() {
ml.StopGRPC(includeAllProcesses)
// for _, p := range ml.grpcProcesses {
// p.Stop()
// }
}
func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string) error {
// Make sure the process is executable
if err := os.Chmod(grpcProcess, 0755); err != nil {
return err
}
log.Debug().Msgf("Loading GRPC Process: %s", grpcProcess)
log.Debug().Msgf("GRPC Service for %s will be running at: '%s'", id, serverAddress)
grpcControlProcess := process.New(
process.WithTemporaryStateDir(),
process.WithName(grpcProcess),
process.WithArgs("--addr", serverAddress),
process.WithEnvironment(os.Environ()...),
)
ml.grpcProcesses[id] = grpcControlProcess
if err := grpcControlProcess.Run(); err != nil {
return err
}
log.Debug().Msgf("GRPC Service state dir: %s", grpcControlProcess.StateDir())
// clean up process
go func() {
c := make(chan os.Signal, 1)
signal.Notify(c, os.Interrupt, syscall.SIGTERM)
<-c
grpcControlProcess.Stop()
}()
go func() {
t, err := tail.TailFile(grpcControlProcess.StderrPath(), tail.Config{Follow: true})
if err != nil {
log.Debug().Msgf("Could not tail stderr")
}
for line := range t.Lines {
log.Debug().Msgf("GRPC(%s): stderr %s", strings.Join([]string{id, serverAddress}, "-"), line.Text)
}
}()
go func() {
t, err := tail.TailFile(grpcControlProcess.StdoutPath(), tail.Config{Follow: true})
if err != nil {
log.Debug().Msgf("Could not tail stdout")
}
for line := range t.Lines {
log.Debug().Msgf("GRPC(%s): stdout %s", strings.Join([]string{id, serverAddress}, "-"), line.Text)
}
}()
return nil
}
// starts the grpcModelProcess for the backend, and returns a grpc client
// It also loads the model
func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string) (*grpc.Client, error) {
@ -248,6 +159,13 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (model *grpc.Client, err er
backend := strings.ToLower(o.backendString)
if o.singleActiveBackend {
ml.mu.Lock()
log.Debug().Msgf("Stopping all backends except '%s'", o.model)
ml.StopAllExcept(o.model)
ml.mu.Unlock()
}
// if an external backend is provided, use it
_, externalBackendExists := o.externalBackends[backend]
if externalBackendExists {
@ -274,14 +192,21 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (model *grpc.Client, err er
func (ml *ModelLoader) GreedyLoader(opts ...Option) (*grpc.Client, error) {
o := NewOptions(opts...)
// Is this really needed? BackendLoader already does this
ml.mu.Lock()
// Return earlier if we have a model already loaded
// (avoid looping through all the backends)
if m := ml.CheckIsLoaded(o.model); m != nil {
log.Debug().Msgf("Model '%s' already loaded", o.model)
ml.mu.Unlock()
return m, nil
}
// If we can have only one backend active, kill all the others (except external backends)
if o.singleActiveBackend {
log.Debug().Msgf("Stopping all backends except '%s'", o.model)
ml.StopAllExcept(o.model)
}
ml.mu.Unlock()
var err error
// autoload also external backends