feat: add machine tag and inference timings (#4577)

* Add machine tag option, add extraUsage option, grpc-server -> proto -> endpoint extraUsage data is broken for now Signed-off-by: mintyleaf <mintyleafdev@gmail.com> * remove redurant timing fields, fix not working timings output Signed-off-by: mintyleaf <mintyleafdev@gmail.com> * use middleware for Machine-Tag only if tag is specified Signed-off-by: mintyleaf <mintyleafdev@gmail.com> --------- Signed-off-by: mintyleaf <mintyleafdev@gmail.com>
2025-05-28 06:25:00 +00:00 · 2025-01-17 20:05:58 +04:00 · 2025-01-17 20:05:58 +04:00 · 96f8ec0402
commit 96f8ec0402
parent 8027fdf1c7
15 changed files with 137 additions and 48 deletions
--- a/core/http/endpoints/openai/edit.go
+++ b/core/http/endpoints/openai/edit.go
@ -25,6 +25,9 @@ import (
 func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {

 	return func(c *fiber.Ctx) error {
+		// Opt-in extra usage flag
+		extraUsage := c.Get("LocalAI-Extra-Usage", "") != ""
+
 		modelFile, input, err := readRequest(c, cl, ml, appConfig, true)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
@ -61,8 +64,20 @@ func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
 			totalTokenUsage.Prompt += tokenUsage.Prompt
 			totalTokenUsage.Completion += tokenUsage.Completion

+			totalTokenUsage.TimingTokenGeneration += tokenUsage.TimingTokenGeneration
+			totalTokenUsage.TimingPromptProcessing += tokenUsage.TimingPromptProcessing
+
 			result = append(result, r...)
 		}
+		usage := schema.OpenAIUsage{
+			PromptTokens:     totalTokenUsage.Prompt,
+			CompletionTokens: totalTokenUsage.Completion,
+			TotalTokens:      totalTokenUsage.Prompt + totalTokenUsage.Completion,
+		}
+		if extraUsage {
+			usage.TimingTokenGeneration = totalTokenUsage.TimingTokenGeneration
+			usage.TimingPromptProcessing = totalTokenUsage.TimingPromptProcessing
+		}

 		id := uuid.New().String()
 		created := int(time.Now().Unix())
@ -72,11 +87,7 @@ func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
 			Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
 			Choices: result,
 			Object:  "edit",
-			Usage: schema.OpenAIUsage{
-				PromptTokens:     totalTokenUsage.Prompt,
-				CompletionTokens: totalTokenUsage.Completion,
-				TotalTokens:      totalTokenUsage.Prompt + totalTokenUsage.Completion,
-			},
+			Usage:   usage,
 		}

 		jsonResult, _ := json.Marshal(resp)