mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-28 06:25:00 +00:00
feat: add external grpc and model autoloading
This commit is contained in:
parent
1d2ae46ddc
commit
94916749c5
15 changed files with 429 additions and 192 deletions
|
@ -18,23 +18,15 @@ type Gallery struct {
|
|||
|
||||
// Installs a model from the gallery (galleryname@modelname)
|
||||
func InstallModelFromGallery(galleries []Gallery, name string, basePath string, req GalleryModel, downloadStatus func(string, string, string, float64)) error {
|
||||
|
||||
// os.PathSeparator is not allowed in model names. Replace them with "__" to avoid conflicts with file paths.
|
||||
name = strings.ReplaceAll(name, string(os.PathSeparator), "__")
|
||||
|
||||
models, err := AvailableGalleryModels(galleries, basePath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
applyModel := func(model *GalleryModel) error {
|
||||
config, err := GetGalleryConfigFromURL(model.URL)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
installName := model.Name
|
||||
if req.Name != "" {
|
||||
model.Name = req.Name
|
||||
installName = req.Name
|
||||
}
|
||||
|
||||
config.Files = append(config.Files, req.AdditionalFiles...)
|
||||
|
@ -45,20 +37,58 @@ func InstallModelFromGallery(galleries []Gallery, name string, basePath string,
|
|||
return err
|
||||
}
|
||||
|
||||
if err := InstallModel(basePath, model.Name, &config, model.Overrides, downloadStatus); err != nil {
|
||||
if err := InstallModel(basePath, installName, &config, model.Overrides, downloadStatus); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
models, err := AvailableGalleryModels(galleries, basePath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
model, err := FindGallery(models, name)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return applyModel(model)
|
||||
}
|
||||
|
||||
func FindGallery(models []*GalleryModel, name string) (*GalleryModel, error) {
|
||||
// os.PathSeparator is not allowed in model names. Replace them with "__" to avoid conflicts with file paths.
|
||||
name = strings.ReplaceAll(name, string(os.PathSeparator), "__")
|
||||
|
||||
for _, model := range models {
|
||||
if name == fmt.Sprintf("%s@%s", model.Gallery.Name, model.Name) {
|
||||
return applyModel(model)
|
||||
return model, nil
|
||||
}
|
||||
}
|
||||
return nil, fmt.Errorf("no gallery found with name %q", name)
|
||||
}
|
||||
|
||||
// InstallModelFromGalleryByName loads a model from the gallery by specifying only the name (first match wins)
|
||||
func InstallModelFromGalleryByName(galleries []Gallery, name string, basePath string, req GalleryModel, downloadStatus func(string, string, string, float64)) error {
|
||||
models, err := AvailableGalleryModels(galleries, basePath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
name = strings.ReplaceAll(name, string(os.PathSeparator), "__")
|
||||
var model *GalleryModel
|
||||
for _, m := range models {
|
||||
if name == m.Name {
|
||||
model = m
|
||||
}
|
||||
}
|
||||
|
||||
return fmt.Errorf("no model found with name %q", name)
|
||||
if model == nil {
|
||||
return fmt.Errorf("no model found with name %q", name)
|
||||
}
|
||||
|
||||
return InstallModelFromGallery(galleries, fmt.Sprintf("%s@%s", model.Gallery.Name, model.Name), basePath, req, downloadStatus)
|
||||
}
|
||||
|
||||
// List available models
|
||||
|
|
|
@ -19,8 +19,6 @@ import (
|
|||
process "github.com/mudler/go-processmanager"
|
||||
)
|
||||
|
||||
const tokenizerSuffix = ".tokenizer.json"
|
||||
|
||||
const (
|
||||
LlamaBackend = "llama"
|
||||
BloomzBackend = "bloomz"
|
||||
|
@ -45,7 +43,6 @@ const (
|
|||
StableDiffusionBackend = "stablediffusion"
|
||||
PiperBackend = "piper"
|
||||
LCHuggingFaceBackend = "langchain-huggingface"
|
||||
//GGLLMFalconBackend = "falcon"
|
||||
)
|
||||
|
||||
var AutoLoadBackends []string = []string{
|
||||
|
@ -75,75 +72,116 @@ func (ml *ModelLoader) StopGRPC() {
|
|||
}
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string) error {
|
||||
// Make sure the process is executable
|
||||
if err := os.Chmod(grpcProcess, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Loading GRPC Process", grpcProcess)
|
||||
|
||||
log.Debug().Msgf("GRPC Service for %s will be running at: '%s'", id, serverAddress)
|
||||
|
||||
grpcControlProcess := process.New(
|
||||
process.WithTemporaryStateDir(),
|
||||
process.WithName(grpcProcess),
|
||||
process.WithArgs("--addr", serverAddress))
|
||||
|
||||
ml.grpcProcesses[id] = grpcControlProcess
|
||||
|
||||
if err := grpcControlProcess.Run(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
log.Debug().Msgf("GRPC Service state dir: %s", grpcControlProcess.StateDir())
|
||||
// clean up process
|
||||
go func() {
|
||||
c := make(chan os.Signal, 1)
|
||||
signal.Notify(c, os.Interrupt, syscall.SIGTERM)
|
||||
<-c
|
||||
grpcControlProcess.Stop()
|
||||
}()
|
||||
|
||||
go func() {
|
||||
t, err := tail.TailFile(grpcControlProcess.StderrPath(), tail.Config{Follow: true})
|
||||
if err != nil {
|
||||
log.Debug().Msgf("Could not tail stderr")
|
||||
}
|
||||
for line := range t.Lines {
|
||||
log.Debug().Msgf("GRPC(%s): stderr %s", strings.Join([]string{id, serverAddress}, "-"), line.Text)
|
||||
}
|
||||
}()
|
||||
go func() {
|
||||
t, err := tail.TailFile(grpcControlProcess.StdoutPath(), tail.Config{Follow: true})
|
||||
if err != nil {
|
||||
log.Debug().Msgf("Could not tail stdout")
|
||||
}
|
||||
for line := range t.Lines {
|
||||
log.Debug().Msgf("GRPC(%s): stdout %s", strings.Join([]string{id, serverAddress}, "-"), line.Text)
|
||||
}
|
||||
}()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// starts the grpcModelProcess for the backend, and returns a grpc client
|
||||
// It also loads the model
|
||||
func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string) (*grpc.Client, error) {
|
||||
return func(s string) (*grpc.Client, error) {
|
||||
log.Debug().Msgf("Loading GRPC Model", backend, *o)
|
||||
|
||||
grpcProcess := filepath.Join(o.assetDir, "backend-assets", "grpc", backend)
|
||||
var client *grpc.Client
|
||||
|
||||
// Check if the file exists
|
||||
if _, err := os.Stat(grpcProcess); os.IsNotExist(err) {
|
||||
return nil, fmt.Errorf("grpc process not found: %s. some backends(stablediffusion, tts) require LocalAI compiled with GO_TAGS", grpcProcess)
|
||||
}
|
||||
|
||||
// Make sure the process is executable
|
||||
if err := os.Chmod(grpcProcess, 0755); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Loading GRPC Process", grpcProcess)
|
||||
port, err := freeport.GetFreePort()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
serverAddress := fmt.Sprintf("localhost:%d", port)
|
||||
|
||||
log.Debug().Msgf("GRPC Service for '%s' (%s) will be running at: '%s'", backend, o.modelFile, serverAddress)
|
||||
|
||||
grpcControlProcess := process.New(
|
||||
process.WithTemporaryStateDir(),
|
||||
process.WithName(grpcProcess),
|
||||
process.WithArgs("--addr", serverAddress))
|
||||
|
||||
ml.grpcProcesses[o.modelFile] = grpcControlProcess
|
||||
|
||||
if err := grpcControlProcess.Run(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// clean up process
|
||||
go func() {
|
||||
c := make(chan os.Signal, 1)
|
||||
signal.Notify(c, os.Interrupt, syscall.SIGTERM)
|
||||
<-c
|
||||
grpcControlProcess.Stop()
|
||||
}()
|
||||
|
||||
go func() {
|
||||
t, err := tail.TailFile(grpcControlProcess.StderrPath(), tail.Config{Follow: true})
|
||||
getFreeAddress := func() (string, error) {
|
||||
port, err := freeport.GetFreePort()
|
||||
if err != nil {
|
||||
log.Debug().Msgf("Could not tail stderr")
|
||||
return "", fmt.Errorf("failed allocating free ports: %s", err.Error())
|
||||
}
|
||||
for line := range t.Lines {
|
||||
log.Debug().Msgf("GRPC(%s): stderr %s", strings.Join([]string{backend, o.modelFile, serverAddress}, "-"), line.Text)
|
||||
return fmt.Sprintf("127.0.0.1:%d", port), nil
|
||||
}
|
||||
|
||||
// Check if the backend is provided as external
|
||||
if uri, ok := o.externalBackends[backend]; ok {
|
||||
log.Debug().Msgf("Loading external backend: %s", uri)
|
||||
// check if uri is a file or a address
|
||||
if _, err := os.Stat(uri); err == nil {
|
||||
serverAddress, err := getFreeAddress()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed allocating free ports: %s", err.Error())
|
||||
}
|
||||
// Make sure the process is executable
|
||||
if err := ml.startProcess(uri, o.modelFile, serverAddress); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
log.Debug().Msgf("GRPC Service Started")
|
||||
|
||||
client = grpc.NewClient(serverAddress)
|
||||
} else {
|
||||
// address
|
||||
client = grpc.NewClient(uri)
|
||||
}
|
||||
}()
|
||||
go func() {
|
||||
t, err := tail.TailFile(grpcControlProcess.StdoutPath(), tail.Config{Follow: true})
|
||||
} else {
|
||||
grpcProcess := filepath.Join(o.assetDir, "backend-assets", "grpc", backend)
|
||||
// Check if the file exists
|
||||
if _, err := os.Stat(grpcProcess); os.IsNotExist(err) {
|
||||
return nil, fmt.Errorf("grpc process not found: %s. some backends(stablediffusion, tts) require LocalAI compiled with GO_TAGS", grpcProcess)
|
||||
}
|
||||
|
||||
serverAddress, err := getFreeAddress()
|
||||
if err != nil {
|
||||
log.Debug().Msgf("Could not tail stdout")
|
||||
return nil, fmt.Errorf("failed allocating free ports: %s", err.Error())
|
||||
}
|
||||
for line := range t.Lines {
|
||||
log.Debug().Msgf("GRPC(%s): stderr %s", strings.Join([]string{backend, o.modelFile, serverAddress}, "-"), line.Text)
|
||||
|
||||
// Make sure the process is executable
|
||||
if err := ml.startProcess(grpcProcess, o.modelFile, serverAddress); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}()
|
||||
|
||||
log.Debug().Msgf("GRPC Service Started")
|
||||
log.Debug().Msgf("GRPC Service Started")
|
||||
|
||||
client := grpc.NewClient(serverAddress)
|
||||
client = grpc.NewClient(serverAddress)
|
||||
}
|
||||
|
||||
// Wait for the service to start up
|
||||
ready := false
|
||||
|
@ -158,11 +196,6 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string) (*grpc
|
|||
|
||||
if !ready {
|
||||
log.Debug().Msgf("GRPC Service NOT ready")
|
||||
log.Debug().Msgf("Alive: ", grpcControlProcess.IsAlive())
|
||||
log.Debug().Msgf(fmt.Sprintf("GRPC Service Exitcode:"))
|
||||
|
||||
log.Debug().Msgf(grpcControlProcess.ExitCode())
|
||||
|
||||
return nil, fmt.Errorf("grpc service not ready")
|
||||
}
|
||||
|
||||
|
@ -189,6 +222,13 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (model *grpc.Client, err er
|
|||
log.Debug().Msgf("Loading model %s from %s", o.backendString, o.modelFile)
|
||||
|
||||
backend := strings.ToLower(o.backendString)
|
||||
|
||||
// if an external backend is provided, use it
|
||||
_, externalBackendExists := o.externalBackends[backend]
|
||||
if externalBackendExists {
|
||||
return ml.LoadModel(o.modelFile, ml.grpcModel(backend, o))
|
||||
}
|
||||
|
||||
switch backend {
|
||||
case LlamaBackend, LlamaGrammarBackend, GPTJBackend, DollyBackend,
|
||||
MPTBackend, Gpt2Backend, FalconBackend,
|
||||
|
@ -209,8 +249,6 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (model *grpc.Client, err er
|
|||
func (ml *ModelLoader) GreedyLoader(opts ...Option) (*grpc.Client, error) {
|
||||
o := NewOptions(opts...)
|
||||
|
||||
log.Debug().Msgf("Loading model '%s' greedly", o.modelFile)
|
||||
|
||||
// Is this really needed? BackendLoader already does this
|
||||
ml.mu.Lock()
|
||||
if m := ml.checkIsLoaded(o.modelFile); m != nil {
|
||||
|
@ -221,16 +259,29 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (*grpc.Client, error) {
|
|||
ml.mu.Unlock()
|
||||
var err error
|
||||
|
||||
for _, b := range AutoLoadBackends {
|
||||
log.Debug().Msgf("[%s] Attempting to load", b)
|
||||
// autoload also external backends
|
||||
allBackendsToAutoLoad := []string{}
|
||||
allBackendsToAutoLoad = append(allBackendsToAutoLoad, AutoLoadBackends...)
|
||||
for _, b := range o.externalBackends {
|
||||
allBackendsToAutoLoad = append(allBackendsToAutoLoad, b)
|
||||
}
|
||||
log.Debug().Msgf("Loading model '%s' greedly from all the available backends: %s", o.modelFile, strings.Join(allBackendsToAutoLoad, ", "))
|
||||
|
||||
model, modelerr := ml.BackendLoader(
|
||||
for _, b := range allBackendsToAutoLoad {
|
||||
log.Debug().Msgf("[%s] Attempting to load", b)
|
||||
options := []Option{
|
||||
WithBackendString(b),
|
||||
WithModelFile(o.modelFile),
|
||||
WithLoadGRPCLLMModelOpts(o.gRPCOptions),
|
||||
WithThreads(o.threads),
|
||||
WithAssetDir(o.assetDir),
|
||||
)
|
||||
}
|
||||
|
||||
for k, v := range o.externalBackends {
|
||||
options = append(options, WithExternalBackend(k, v))
|
||||
}
|
||||
|
||||
model, modelerr := ml.BackendLoader(options...)
|
||||
if modelerr == nil && model != nil {
|
||||
log.Debug().Msgf("[%s] Loads OK", b)
|
||||
return model, nil
|
||||
|
|
|
@ -14,10 +14,21 @@ type Options struct {
|
|||
context context.Context
|
||||
|
||||
gRPCOptions *pb.ModelOptions
|
||||
|
||||
externalBackends map[string]string
|
||||
}
|
||||
|
||||
type Option func(*Options)
|
||||
|
||||
func WithExternalBackend(name string, uri string) Option {
|
||||
return func(o *Options) {
|
||||
if o.externalBackends == nil {
|
||||
o.externalBackends = make(map[string]string)
|
||||
}
|
||||
o.externalBackends[name] = uri
|
||||
}
|
||||
}
|
||||
|
||||
func WithBackendString(backend string) Option {
|
||||
return func(o *Options) {
|
||||
o.backendString = backend
|
||||
|
|
37
pkg/utils/logging.go
Normal file
37
pkg/utils/logging.go
Normal file
|
@ -0,0 +1,37 @@
|
|||
package utils
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
var lastProgress time.Time = time.Now()
|
||||
var startTime time.Time = time.Now()
|
||||
|
||||
func ResetDownloadTimers() {
|
||||
lastProgress = time.Now()
|
||||
startTime = time.Now()
|
||||
}
|
||||
|
||||
func DisplayDownloadFunction(fileName string, current string, total string, percentage float64) {
|
||||
currentTime := time.Now()
|
||||
|
||||
if currentTime.Sub(lastProgress) >= 5*time.Second {
|
||||
|
||||
lastProgress = currentTime
|
||||
|
||||
// calculate ETA based on percentage and elapsed time
|
||||
var eta time.Duration
|
||||
if percentage > 0 {
|
||||
elapsed := currentTime.Sub(startTime)
|
||||
eta = time.Duration(float64(elapsed)*(100/percentage) - float64(elapsed))
|
||||
}
|
||||
|
||||
if total != "" {
|
||||
log.Debug().Msgf("Downloading %s: %s/%s (%.2f%%) ETA: %s", fileName, current, total, percentage, eta)
|
||||
} else {
|
||||
log.Debug().Msgf("Downloading: %s", current)
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue