mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-28 14:35:00 +00:00
feat: initial watchdog implementation (#1341)
* feat: initial watchdog implementation Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com> * fiuxups * Add more output * wip: idletime checker * wire idle watchdog checks * enlarge watchdog time window * small fixes * Use stopmodel * Always delete process Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com> Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
9482acfdfc
commit
824612f1b4
10 changed files with 341 additions and 13 deletions
|
@ -121,7 +121,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
|
|||
// Wait for the service to start up
|
||||
ready := false
|
||||
for i := 0; i < o.grpcAttempts; i++ {
|
||||
if client.GRPC(o.parallelRequests).HealthCheck(context.Background()) {
|
||||
if client.GRPC(o.parallelRequests, ml.wd).HealthCheck(context.Background()) {
|
||||
log.Debug().Msgf("GRPC Service Ready")
|
||||
ready = true
|
||||
break
|
||||
|
@ -140,7 +140,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
|
|||
|
||||
log.Debug().Msgf("GRPC: Loading model with options: %+v", options)
|
||||
|
||||
res, err := client.GRPC(o.parallelRequests).LoadModel(o.context, &options)
|
||||
res, err := client.GRPC(o.parallelRequests, ml.wd).LoadModel(o.context, &options)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("could not load model: %w", err)
|
||||
}
|
||||
|
@ -154,11 +154,11 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
|
|||
|
||||
func (ml *ModelLoader) resolveAddress(addr ModelAddress, parallel bool) (*grpc.Client, error) {
|
||||
if parallel {
|
||||
return addr.GRPC(parallel), nil
|
||||
return addr.GRPC(parallel, ml.wd), nil
|
||||
}
|
||||
|
||||
if _, ok := ml.grpcClients[string(addr)]; !ok {
|
||||
ml.grpcClients[string(addr)] = addr.GRPC(parallel)
|
||||
ml.grpcClients[string(addr)] = addr.GRPC(parallel, ml.wd)
|
||||
}
|
||||
return ml.grpcClients[string(addr)], nil
|
||||
}
|
||||
|
|
|
@ -63,12 +63,17 @@ type ModelLoader struct {
|
|||
models map[string]ModelAddress
|
||||
grpcProcesses map[string]*process.Process
|
||||
templates map[TemplateType]map[string]*template.Template
|
||||
wd *WatchDog
|
||||
}
|
||||
|
||||
type ModelAddress string
|
||||
|
||||
func (m ModelAddress) GRPC(parallel bool) *grpc.Client {
|
||||
return grpc.NewClient(string(m), parallel)
|
||||
func (m ModelAddress) GRPC(parallel bool, wd *WatchDog) *grpc.Client {
|
||||
enableWD := false
|
||||
if wd != nil {
|
||||
enableWD = true
|
||||
}
|
||||
return grpc.NewClient(string(m), parallel, wd, enableWD)
|
||||
}
|
||||
|
||||
func NewModelLoader(modelPath string) *ModelLoader {
|
||||
|
@ -79,10 +84,15 @@ func NewModelLoader(modelPath string) *ModelLoader {
|
|||
templates: make(map[TemplateType]map[string]*template.Template),
|
||||
grpcProcesses: make(map[string]*process.Process),
|
||||
}
|
||||
|
||||
nml.initializeTemplateMap()
|
||||
return nml
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) SetWatchDog(wd *WatchDog) {
|
||||
ml.wd = wd
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) ExistsInModelPath(s string) bool {
|
||||
return existsInPath(ml.ModelPath, s)
|
||||
}
|
||||
|
@ -139,11 +149,17 @@ func (ml *ModelLoader) LoadModel(modelName string, loader func(string, string) (
|
|||
func (ml *ModelLoader) ShutdownModel(modelName string) error {
|
||||
ml.mu.Lock()
|
||||
defer ml.mu.Unlock()
|
||||
|
||||
return ml.StopModel(modelName)
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) StopModel(modelName string) error {
|
||||
defer ml.deleteProcess(modelName)
|
||||
if _, ok := ml.models[modelName]; !ok {
|
||||
return fmt.Errorf("model %s not found", modelName)
|
||||
}
|
||||
|
||||
return ml.deleteProcess(modelName)
|
||||
return nil
|
||||
//return ml.deleteProcess(modelName)
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) CheckIsLoaded(s string) ModelAddress {
|
||||
|
@ -153,7 +169,7 @@ func (ml *ModelLoader) CheckIsLoaded(s string) ModelAddress {
|
|||
if c, ok := ml.grpcClients[s]; ok {
|
||||
client = c
|
||||
} else {
|
||||
client = m.GRPC(false)
|
||||
client = m.GRPC(false, ml.wd)
|
||||
}
|
||||
|
||||
if !client.HealthCheck(context.Background()) {
|
||||
|
|
|
@ -17,7 +17,7 @@ import (
|
|||
func (ml *ModelLoader) StopAllExcept(s string) {
|
||||
ml.StopGRPC(func(id string, p *process.Process) bool {
|
||||
if id != s {
|
||||
for ml.models[id].GRPC(false).IsBusy() {
|
||||
for ml.models[id].GRPC(false, ml.wd).IsBusy() {
|
||||
log.Debug().Msgf("%s busy. Waiting.", id)
|
||||
time.Sleep(2 * time.Second)
|
||||
}
|
||||
|
@ -80,6 +80,11 @@ func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string
|
|||
process.WithEnvironment(os.Environ()...),
|
||||
)
|
||||
|
||||
if ml.wd != nil {
|
||||
ml.wd.Add(serverAddress, grpcControlProcess)
|
||||
ml.wd.AddAddressModelMap(serverAddress, id)
|
||||
}
|
||||
|
||||
ml.grpcProcesses[id] = grpcControlProcess
|
||||
|
||||
if err := grpcControlProcess.Run(); err != nil {
|
||||
|
|
155
pkg/model/watchdog.go
Normal file
155
pkg/model/watchdog.go
Normal file
|
@ -0,0 +1,155 @@
|
|||
package model
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
process "github.com/mudler/go-processmanager"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
// All GRPC Clients created by ModelLoader should have an associated injected
|
||||
// watchdog that will keep track of the state of each backend (busy or not)
|
||||
// and for how much time it has been busy.
|
||||
// If a backend is busy for too long, the watchdog will kill the process and
|
||||
// force a reload of the model
|
||||
// The watchdog runs as a separate go routine,
|
||||
// and the GRPC client talks to it via a channel to send status updates
|
||||
|
||||
type WatchDog struct {
|
||||
sync.Mutex
|
||||
timetable map[string]time.Time
|
||||
idleTime map[string]time.Time
|
||||
timeout, idletimeout time.Duration
|
||||
addressMap map[string]*process.Process
|
||||
addressModelMap map[string]string
|
||||
pm ProcessManager
|
||||
stop chan bool
|
||||
|
||||
busyCheck, idleCheck bool
|
||||
}
|
||||
|
||||
type ProcessManager interface {
|
||||
StopModel(modelName string) error
|
||||
}
|
||||
|
||||
func NewWatchDog(pm ProcessManager, timeoutBusy, timeoutIdle time.Duration, busy, idle bool) *WatchDog {
|
||||
return &WatchDog{
|
||||
timeout: timeoutBusy,
|
||||
idletimeout: timeoutIdle,
|
||||
pm: pm,
|
||||
timetable: make(map[string]time.Time),
|
||||
idleTime: make(map[string]time.Time),
|
||||
addressMap: make(map[string]*process.Process),
|
||||
busyCheck: busy,
|
||||
idleCheck: idle,
|
||||
addressModelMap: make(map[string]string),
|
||||
}
|
||||
}
|
||||
|
||||
func (wd *WatchDog) Shutdown() {
|
||||
wd.Lock()
|
||||
defer wd.Unlock()
|
||||
wd.stop <- true
|
||||
}
|
||||
|
||||
func (wd *WatchDog) AddAddressModelMap(address string, model string) {
|
||||
wd.Lock()
|
||||
defer wd.Unlock()
|
||||
wd.addressModelMap[address] = model
|
||||
|
||||
}
|
||||
func (wd *WatchDog) Add(address string, p *process.Process) {
|
||||
wd.Lock()
|
||||
defer wd.Unlock()
|
||||
wd.addressMap[address] = p
|
||||
}
|
||||
|
||||
func (wd *WatchDog) Mark(address string) {
|
||||
wd.Lock()
|
||||
defer wd.Unlock()
|
||||
wd.timetable[address] = time.Now()
|
||||
delete(wd.idleTime, address)
|
||||
}
|
||||
|
||||
func (wd *WatchDog) UnMark(ModelAddress string) {
|
||||
wd.Lock()
|
||||
defer wd.Unlock()
|
||||
delete(wd.timetable, ModelAddress)
|
||||
wd.idleTime[ModelAddress] = time.Now()
|
||||
}
|
||||
|
||||
func (wd *WatchDog) Run() {
|
||||
log.Info().Msg("[WatchDog] starting watchdog")
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-wd.stop:
|
||||
log.Info().Msg("[WatchDog] Stopping watchdog")
|
||||
return
|
||||
case <-time.After(30 * time.Second):
|
||||
if !wd.busyCheck && !wd.idleCheck {
|
||||
log.Info().Msg("[WatchDog] No checks enabled, stopping watchdog")
|
||||
return
|
||||
}
|
||||
if wd.busyCheck {
|
||||
wd.checkBusy()
|
||||
}
|
||||
if wd.idleCheck {
|
||||
wd.checkIdle()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (wd *WatchDog) checkIdle() {
|
||||
wd.Lock()
|
||||
defer wd.Unlock()
|
||||
log.Debug().Msg("[WatchDog] Watchdog checks for idle connections")
|
||||
for address, t := range wd.idleTime {
|
||||
log.Debug().Msgf("[WatchDog] %s: idle connection", address)
|
||||
if time.Since(t) > wd.idletimeout {
|
||||
log.Warn().Msgf("[WatchDog] Address %s is idle for too long, killing it", address)
|
||||
p, ok := wd.addressModelMap[address]
|
||||
if ok {
|
||||
if err := wd.pm.StopModel(p); err != nil {
|
||||
log.Error().Msgf("[watchdog] Error shutting down model %s: %v", p, err)
|
||||
}
|
||||
delete(wd.idleTime, address)
|
||||
delete(wd.addressModelMap, address)
|
||||
delete(wd.addressMap, address)
|
||||
} else {
|
||||
log.Warn().Msgf("[WatchDog] Address %s unresolvable", address)
|
||||
delete(wd.idleTime, address)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (wd *WatchDog) checkBusy() {
|
||||
wd.Lock()
|
||||
defer wd.Unlock()
|
||||
log.Debug().Msg("[WatchDog] Watchdog checks for busy connections")
|
||||
|
||||
for address, t := range wd.timetable {
|
||||
log.Debug().Msgf("[WatchDog] %s: active connection", address)
|
||||
|
||||
if time.Since(t) > wd.timeout {
|
||||
|
||||
model, ok := wd.addressModelMap[address]
|
||||
if ok {
|
||||
log.Warn().Msgf("[WatchDog] Model %s is busy for too long, killing it", model)
|
||||
if err := wd.pm.StopModel(model); err != nil {
|
||||
log.Error().Msgf("[watchdog] Error shutting down model %s: %v", model, err)
|
||||
}
|
||||
delete(wd.timetable, address)
|
||||
delete(wd.addressModelMap, address)
|
||||
delete(wd.addressMap, address)
|
||||
} else {
|
||||
log.Warn().Msgf("[WatchDog] Address %s unresolvable", address)
|
||||
delete(wd.timetable, address)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue