mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-30 15:35:01 +00:00
feat: add machine tag and inference timings (#4577)
* Add machine tag option, add extraUsage option, grpc-server -> proto -> endpoint extraUsage data is broken for now Signed-off-by: mintyleaf <mintyleafdev@gmail.com> * remove redurant timing fields, fix not working timings output Signed-off-by: mintyleaf <mintyleafdev@gmail.com> * use middleware for Machine-Tag only if tag is specified Signed-off-by: mintyleaf <mintyleafdev@gmail.com> --------- Signed-off-by: mintyleaf <mintyleafdev@gmail.com>
This commit is contained in:
parent
8027fdf1c7
commit
96f8ec0402
15 changed files with 137 additions and 48 deletions
|
@ -27,8 +27,10 @@ type LLMResponse struct {
|
|||
}
|
||||
|
||||
type TokenUsage struct {
|
||||
Prompt int
|
||||
Completion int
|
||||
Prompt int
|
||||
Completion int
|
||||
TimingPromptProcessing float64
|
||||
TimingTokenGeneration float64
|
||||
}
|
||||
|
||||
func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
|
||||
|
@ -123,6 +125,8 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
|
|||
|
||||
tokenUsage.Prompt = int(reply.PromptTokens)
|
||||
tokenUsage.Completion = int(reply.Tokens)
|
||||
tokenUsage.TimingTokenGeneration = reply.TimingTokenGeneration
|
||||
tokenUsage.TimingPromptProcessing = reply.TimingPromptProcessing
|
||||
|
||||
for len(partialRune) > 0 {
|
||||
r, size := utf8.DecodeRune(partialRune)
|
||||
|
@ -157,6 +161,10 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
|
|||
if tokenUsage.Completion == 0 {
|
||||
tokenUsage.Completion = int(reply.Tokens)
|
||||
}
|
||||
|
||||
tokenUsage.TimingTokenGeneration = reply.TimingTokenGeneration
|
||||
tokenUsage.TimingPromptProcessing = reply.TimingPromptProcessing
|
||||
|
||||
return LLMResponse{
|
||||
Response: string(reply.Message),
|
||||
Usage: tokenUsage,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue