* Revert "fix(fncall): fix regression introduced in #1963 (#2048)"

This reverts commit 6b06d4e0af.

* Revert "fix: action-tmate back to upstream, dead code removal (#2038)"

This reverts commit fdec8a9d00.

* Revert "feat(grpc): return consumed token count and update response accordingly (#2035)"

This reverts commit e843d7df0e.

* Revert "refactor: backend/service split, channel-based llm flow (#1963)"

This reverts commit eed5706994.

* feat(grpc): return consumed token count and update response accordingly

Fixes: #1920

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2024-04-17 23:33:49 +02:00 committed by GitHub
parent af8c705ecd
commit af9e5a2d05
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
52 changed files with 2295 additions and 3065 deletions

View file

@ -10,7 +10,7 @@ import (
model "github.com/go-skynet/LocalAI/pkg/model"
)
func modelOpts(bc *config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option {
func modelOpts(c config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option {
if so.SingleBackend {
opts = append(opts, model.WithSingleActiveBackend())
}
@ -19,12 +19,12 @@ func modelOpts(bc *config.BackendConfig, so *config.ApplicationConfig, opts []mo
opts = append(opts, model.EnableParallelRequests)
}
if bc.GRPC.Attempts != 0 {
opts = append(opts, model.WithGRPCAttempts(bc.GRPC.Attempts))
if c.GRPC.Attempts != 0 {
opts = append(opts, model.WithGRPCAttempts(c.GRPC.Attempts))
}
if bc.GRPC.AttemptsSleepTime != 0 {
opts = append(opts, model.WithGRPCAttemptsDelay(bc.GRPC.AttemptsSleepTime))
if c.GRPC.AttemptsSleepTime != 0 {
opts = append(opts, model.WithGRPCAttemptsDelay(c.GRPC.AttemptsSleepTime))
}
for k, v := range so.ExternalGRPCBackends {
@ -34,7 +34,7 @@ func modelOpts(bc *config.BackendConfig, so *config.ApplicationConfig, opts []mo
return opts
}
func getSeed(c *config.BackendConfig) int32 {
func getSeed(c config.BackendConfig) int32 {
seed := int32(*c.Seed)
if seed == config.RAND_SEED {
seed = rand.Int31()
@ -43,7 +43,7 @@ func getSeed(c *config.BackendConfig) int32 {
return seed
}
func gRPCModelOpts(c *config.BackendConfig) *pb.ModelOptions {
func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
b := 512
if c.Batch != 0 {
b = c.Batch
@ -104,47 +104,47 @@ func gRPCModelOpts(c *config.BackendConfig) *pb.ModelOptions {
}
}
func gRPCPredictOpts(bc *config.BackendConfig, modelPath string) *pb.PredictOptions {
func gRPCPredictOpts(c config.BackendConfig, modelPath string) *pb.PredictOptions {
promptCachePath := ""
if bc.PromptCachePath != "" {
p := filepath.Join(modelPath, bc.PromptCachePath)
if c.PromptCachePath != "" {
p := filepath.Join(modelPath, c.PromptCachePath)
os.MkdirAll(filepath.Dir(p), 0755)
promptCachePath = p
}
return &pb.PredictOptions{
Temperature: float32(*bc.Temperature),
TopP: float32(*bc.TopP),
NDraft: bc.NDraft,
TopK: int32(*bc.TopK),
Tokens: int32(*bc.Maxtokens),
Threads: int32(*bc.Threads),
PromptCacheAll: bc.PromptCacheAll,
PromptCacheRO: bc.PromptCacheRO,
Temperature: float32(*c.Temperature),
TopP: float32(*c.TopP),
NDraft: c.NDraft,
TopK: int32(*c.TopK),
Tokens: int32(*c.Maxtokens),
Threads: int32(*c.Threads),
PromptCacheAll: c.PromptCacheAll,
PromptCacheRO: c.PromptCacheRO,
PromptCachePath: promptCachePath,
F16KV: *bc.F16,
DebugMode: *bc.Debug,
Grammar: bc.Grammar,
NegativePromptScale: bc.NegativePromptScale,
RopeFreqBase: bc.RopeFreqBase,
RopeFreqScale: bc.RopeFreqScale,
NegativePrompt: bc.NegativePrompt,
Mirostat: int32(*bc.LLMConfig.Mirostat),
MirostatETA: float32(*bc.LLMConfig.MirostatETA),
MirostatTAU: float32(*bc.LLMConfig.MirostatTAU),
Debug: *bc.Debug,
StopPrompts: bc.StopWords,
Repeat: int32(bc.RepeatPenalty),
NKeep: int32(bc.Keep),
Batch: int32(bc.Batch),
IgnoreEOS: bc.IgnoreEOS,
Seed: getSeed(bc),
FrequencyPenalty: float32(bc.FrequencyPenalty),
MLock: *bc.MMlock,
MMap: *bc.MMap,
MainGPU: bc.MainGPU,
TensorSplit: bc.TensorSplit,
TailFreeSamplingZ: float32(*bc.TFZ),
TypicalP: float32(*bc.TypicalP),
F16KV: *c.F16,
DebugMode: *c.Debug,
Grammar: c.Grammar,
NegativePromptScale: c.NegativePromptScale,
RopeFreqBase: c.RopeFreqBase,
RopeFreqScale: c.RopeFreqScale,
NegativePrompt: c.NegativePrompt,
Mirostat: int32(*c.LLMConfig.Mirostat),
MirostatETA: float32(*c.LLMConfig.MirostatETA),
MirostatTAU: float32(*c.LLMConfig.MirostatTAU),
Debug: *c.Debug,
StopPrompts: c.StopWords,
Repeat: int32(c.RepeatPenalty),
NKeep: int32(c.Keep),
Batch: int32(c.Batch),
IgnoreEOS: c.IgnoreEOS,
Seed: getSeed(c),
FrequencyPenalty: float32(c.FrequencyPenalty),
MLock: *c.MMlock,
MMap: *c.MMap,
MainGPU: c.MainGPU,
TensorSplit: c.TensorSplit,
TailFreeSamplingZ: float32(*c.TFZ),
TypicalP: float32(*c.TypicalP),
}
}