feat: track internally started models by ID (#3693)

* chore(refactor): track internally started models by ID

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Just extend options, no need to copy

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Improve debugging for rerankers failures

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Simplify model loading with rerankers

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Be more consistent when generating model options

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Uncommitted code

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Make deleteProcess more idiomatic

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Adapt CLI for sound generation

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fixup threads definition

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Handle corner case where c.Seed is nil

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Consistently use ModelOptions

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Adapt new code to refactoring

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: Dave <dave@gray101.com>
This commit is contained in:
Ettore Di Giacinto 2024-10-02 08:55:58 +02:00 committed by GitHub
parent db704199dc
commit 0965c6cd68
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
20 changed files with 169 additions and 185 deletions

View file

@ -11,32 +11,65 @@ import (
"github.com/rs/zerolog/log"
)
func modelOpts(c config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option {
func ModelOptions(c config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option {
name := c.Name
if name == "" {
name = c.Model
}
defOpts := []model.Option{
model.WithBackendString(c.Backend),
model.WithModel(c.Model),
model.WithAssetDir(so.AssetsDestination),
model.WithContext(so.Context),
model.WithModelID(name),
}
threads := 1
if c.Threads != nil {
threads = *c.Threads
}
if so.Threads != 0 {
threads = so.Threads
}
c.Threads = &threads
grpcOpts := grpcModelOpts(c)
defOpts = append(defOpts, model.WithLoadGRPCLoadModelOpts(grpcOpts))
if so.SingleBackend {
opts = append(opts, model.WithSingleActiveBackend())
defOpts = append(defOpts, model.WithSingleActiveBackend())
}
if so.ParallelBackendRequests {
opts = append(opts, model.EnableParallelRequests)
defOpts = append(defOpts, model.EnableParallelRequests)
}
if c.GRPC.Attempts != 0 {
opts = append(opts, model.WithGRPCAttempts(c.GRPC.Attempts))
defOpts = append(defOpts, model.WithGRPCAttempts(c.GRPC.Attempts))
}
if c.GRPC.AttemptsSleepTime != 0 {
opts = append(opts, model.WithGRPCAttemptsDelay(c.GRPC.AttemptsSleepTime))
defOpts = append(defOpts, model.WithGRPCAttemptsDelay(c.GRPC.AttemptsSleepTime))
}
for k, v := range so.ExternalGRPCBackends {
opts = append(opts, model.WithExternalBackend(k, v))
defOpts = append(defOpts, model.WithExternalBackend(k, v))
}
return opts
return append(defOpts, opts...)
}
func getSeed(c config.BackendConfig) int32 {
seed := int32(*c.Seed)
var seed int32 = config.RAND_SEED
if c.Seed != nil {
seed = int32(*c.Seed)
}
if seed == config.RAND_SEED {
seed = rand.Int31()
}
@ -44,11 +77,47 @@ func getSeed(c config.BackendConfig) int32 {
return seed
}
func GRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
b := 512
if c.Batch != 0 {
b = c.Batch
}
f16 := false
if c.F16 != nil {
f16 = *c.F16
}
embeddings := false
if c.Embeddings != nil {
embeddings = *c.Embeddings
}
lowVRAM := false
if c.LowVRAM != nil {
lowVRAM = *c.LowVRAM
}
mmap := false
if c.MMap != nil {
mmap = *c.MMap
}
ctxSize := 1024
if c.ContextSize != nil {
ctxSize = *c.ContextSize
}
mmlock := false
if c.MMlock != nil {
mmlock = *c.MMlock
}
nGPULayers := 9999999
if c.NGPULayers != nil {
nGPULayers = *c.NGPULayers
}
return &pb.ModelOptions{
CUDA: c.CUDA || c.Diffusers.CUDA,
SchedulerType: c.Diffusers.SchedulerType,
@ -56,14 +125,14 @@ func GRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
CFGScale: c.Diffusers.CFGScale,
LoraAdapter: c.LoraAdapter,
LoraScale: c.LoraScale,
F16Memory: *c.F16,
F16Memory: f16,
LoraBase: c.LoraBase,
IMG2IMG: c.Diffusers.IMG2IMG,
CLIPModel: c.Diffusers.ClipModel,
CLIPSubfolder: c.Diffusers.ClipSubFolder,
CLIPSkip: int32(c.Diffusers.ClipSkip),
ControlNet: c.Diffusers.ControlNet,
ContextSize: int32(*c.ContextSize),
ContextSize: int32(ctxSize),
Seed: getSeed(c),
NBatch: int32(b),
NoMulMatQ: c.NoMulMatQ,
@ -85,16 +154,16 @@ func GRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
YarnBetaSlow: c.YarnBetaSlow,
NGQA: c.NGQA,
RMSNormEps: c.RMSNormEps,
MLock: *c.MMlock,
MLock: mmlock,
RopeFreqBase: c.RopeFreqBase,
RopeScaling: c.RopeScaling,
Type: c.ModelType,
RopeFreqScale: c.RopeFreqScale,
NUMA: c.NUMA,
Embeddings: *c.Embeddings,
LowVRAM: *c.LowVRAM,
NGPULayers: int32(*c.NGPULayers),
MMap: *c.MMap,
Embeddings: embeddings,
LowVRAM: lowVRAM,
NGPULayers: int32(nGPULayers),
MMap: mmap,
MainGPU: c.MainGPU,
Threads: int32(*c.Threads),
TensorSplit: c.TensorSplit,