mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-20 10:35:01 +00:00
* Revert "fix(fncall): fix regression introduced in #1963 (#2048)" This reverts commit6b06d4e0af
. * Revert "fix: action-tmate back to upstream, dead code removal (#2038)" This reverts commitfdec8a9d00
. * Revert "feat(grpc): return consumed token count and update response accordingly (#2035)" This reverts commite843d7df0e
. * Revert "refactor: backend/service split, channel-based llm flow (#1963)" This reverts commiteed5706994
. * feat(grpc): return consumed token count and update response accordingly Fixes: #1920 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
af8c705ecd
commit
af9e5a2d05
52 changed files with 2295 additions and 3065 deletions
|
@ -10,7 +10,7 @@ import (
|
|||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
)
|
||||
|
||||
func modelOpts(bc *config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option {
|
||||
func modelOpts(c config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option {
|
||||
if so.SingleBackend {
|
||||
opts = append(opts, model.WithSingleActiveBackend())
|
||||
}
|
||||
|
@ -19,12 +19,12 @@ func modelOpts(bc *config.BackendConfig, so *config.ApplicationConfig, opts []mo
|
|||
opts = append(opts, model.EnableParallelRequests)
|
||||
}
|
||||
|
||||
if bc.GRPC.Attempts != 0 {
|
||||
opts = append(opts, model.WithGRPCAttempts(bc.GRPC.Attempts))
|
||||
if c.GRPC.Attempts != 0 {
|
||||
opts = append(opts, model.WithGRPCAttempts(c.GRPC.Attempts))
|
||||
}
|
||||
|
||||
if bc.GRPC.AttemptsSleepTime != 0 {
|
||||
opts = append(opts, model.WithGRPCAttemptsDelay(bc.GRPC.AttemptsSleepTime))
|
||||
if c.GRPC.AttemptsSleepTime != 0 {
|
||||
opts = append(opts, model.WithGRPCAttemptsDelay(c.GRPC.AttemptsSleepTime))
|
||||
}
|
||||
|
||||
for k, v := range so.ExternalGRPCBackends {
|
||||
|
@ -34,7 +34,7 @@ func modelOpts(bc *config.BackendConfig, so *config.ApplicationConfig, opts []mo
|
|||
return opts
|
||||
}
|
||||
|
||||
func getSeed(c *config.BackendConfig) int32 {
|
||||
func getSeed(c config.BackendConfig) int32 {
|
||||
seed := int32(*c.Seed)
|
||||
if seed == config.RAND_SEED {
|
||||
seed = rand.Int31()
|
||||
|
@ -43,7 +43,7 @@ func getSeed(c *config.BackendConfig) int32 {
|
|||
return seed
|
||||
}
|
||||
|
||||
func gRPCModelOpts(c *config.BackendConfig) *pb.ModelOptions {
|
||||
func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
||||
b := 512
|
||||
if c.Batch != 0 {
|
||||
b = c.Batch
|
||||
|
@ -104,47 +104,47 @@ func gRPCModelOpts(c *config.BackendConfig) *pb.ModelOptions {
|
|||
}
|
||||
}
|
||||
|
||||
func gRPCPredictOpts(bc *config.BackendConfig, modelPath string) *pb.PredictOptions {
|
||||
func gRPCPredictOpts(c config.BackendConfig, modelPath string) *pb.PredictOptions {
|
||||
promptCachePath := ""
|
||||
if bc.PromptCachePath != "" {
|
||||
p := filepath.Join(modelPath, bc.PromptCachePath)
|
||||
if c.PromptCachePath != "" {
|
||||
p := filepath.Join(modelPath, c.PromptCachePath)
|
||||
os.MkdirAll(filepath.Dir(p), 0755)
|
||||
promptCachePath = p
|
||||
}
|
||||
|
||||
return &pb.PredictOptions{
|
||||
Temperature: float32(*bc.Temperature),
|
||||
TopP: float32(*bc.TopP),
|
||||
NDraft: bc.NDraft,
|
||||
TopK: int32(*bc.TopK),
|
||||
Tokens: int32(*bc.Maxtokens),
|
||||
Threads: int32(*bc.Threads),
|
||||
PromptCacheAll: bc.PromptCacheAll,
|
||||
PromptCacheRO: bc.PromptCacheRO,
|
||||
Temperature: float32(*c.Temperature),
|
||||
TopP: float32(*c.TopP),
|
||||
NDraft: c.NDraft,
|
||||
TopK: int32(*c.TopK),
|
||||
Tokens: int32(*c.Maxtokens),
|
||||
Threads: int32(*c.Threads),
|
||||
PromptCacheAll: c.PromptCacheAll,
|
||||
PromptCacheRO: c.PromptCacheRO,
|
||||
PromptCachePath: promptCachePath,
|
||||
F16KV: *bc.F16,
|
||||
DebugMode: *bc.Debug,
|
||||
Grammar: bc.Grammar,
|
||||
NegativePromptScale: bc.NegativePromptScale,
|
||||
RopeFreqBase: bc.RopeFreqBase,
|
||||
RopeFreqScale: bc.RopeFreqScale,
|
||||
NegativePrompt: bc.NegativePrompt,
|
||||
Mirostat: int32(*bc.LLMConfig.Mirostat),
|
||||
MirostatETA: float32(*bc.LLMConfig.MirostatETA),
|
||||
MirostatTAU: float32(*bc.LLMConfig.MirostatTAU),
|
||||
Debug: *bc.Debug,
|
||||
StopPrompts: bc.StopWords,
|
||||
Repeat: int32(bc.RepeatPenalty),
|
||||
NKeep: int32(bc.Keep),
|
||||
Batch: int32(bc.Batch),
|
||||
IgnoreEOS: bc.IgnoreEOS,
|
||||
Seed: getSeed(bc),
|
||||
FrequencyPenalty: float32(bc.FrequencyPenalty),
|
||||
MLock: *bc.MMlock,
|
||||
MMap: *bc.MMap,
|
||||
MainGPU: bc.MainGPU,
|
||||
TensorSplit: bc.TensorSplit,
|
||||
TailFreeSamplingZ: float32(*bc.TFZ),
|
||||
TypicalP: float32(*bc.TypicalP),
|
||||
F16KV: *c.F16,
|
||||
DebugMode: *c.Debug,
|
||||
Grammar: c.Grammar,
|
||||
NegativePromptScale: c.NegativePromptScale,
|
||||
RopeFreqBase: c.RopeFreqBase,
|
||||
RopeFreqScale: c.RopeFreqScale,
|
||||
NegativePrompt: c.NegativePrompt,
|
||||
Mirostat: int32(*c.LLMConfig.Mirostat),
|
||||
MirostatETA: float32(*c.LLMConfig.MirostatETA),
|
||||
MirostatTAU: float32(*c.LLMConfig.MirostatTAU),
|
||||
Debug: *c.Debug,
|
||||
StopPrompts: c.StopWords,
|
||||
Repeat: int32(c.RepeatPenalty),
|
||||
NKeep: int32(c.Keep),
|
||||
Batch: int32(c.Batch),
|
||||
IgnoreEOS: c.IgnoreEOS,
|
||||
Seed: getSeed(c),
|
||||
FrequencyPenalty: float32(c.FrequencyPenalty),
|
||||
MLock: *c.MMlock,
|
||||
MMap: *c.MMap,
|
||||
MainGPU: c.MainGPU,
|
||||
TensorSplit: c.TensorSplit,
|
||||
TailFreeSamplingZ: float32(*c.TFZ),
|
||||
TypicalP: float32(*c.TypicalP),
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue