feat: Add Get Token Metrics to GRPC server (#3687)

* Add Get Token Metrics to GRPC server Signed-off-by: Siddharth More <siddimore@gmail.com> * Expose LocalAI endpoint Signed-off-by: Siddharth More <siddimore@gmail.com> --------- Signed-off-by: Siddharth More <siddimore@gmail.com>
2025-05-28 14:35:00 +00:00 · 2024-10-01 05:41:20 -07:00 · 2024-10-01 05:41:20 -07:00 · f84b55d1ef
commit f84b55d1ef
parent 139209353f
8 changed files with 180 additions and 0 deletions
--- a/core/backend/token_metrics.go
+++ b/core/backend/token_metrics.go
@ -0,0 +1,44 @@
+package backend
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/pkg/grpc/proto"
+	model "github.com/mudler/LocalAI/pkg/model"
+)
+
+func TokenMetrics(
+	backend,
+	modelFile string,
+	loader *model.ModelLoader,
+	appConfig *config.ApplicationConfig,
+	backendConfig config.BackendConfig) (*proto.MetricsResponse, error) {
+	bb := backend
+	if bb == "" {
+		return nil, fmt.Errorf("backend is required")
+	}
+
+	grpcOpts := GRPCModelOpts(backendConfig)
+
+	opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
+		model.WithBackendString(bb),
+		model.WithModel(modelFile),
+		model.WithContext(appConfig.Context),
+		model.WithAssetDir(appConfig.AssetsDestination),
+		model.WithLoadGRPCLoadModelOpts(grpcOpts),
+	})
+	model, err := loader.BackendLoader(opts...)
+	if err != nil {
+		return nil, err
+	}
+
+	if model == nil {
+		return nil, fmt.Errorf("could not loadmodel model")
+	}
+
+	res, err := model.GetTokenMetrics(context.Background(), &proto.MetricsRequest{})
+
+	return res, err
+}
--- a/core/http/endpoints/localai/get_token_metrics.go
+++ b/core/http/endpoints/localai/get_token_metrics.go
@ -0,0 +1,60 @@
+package localai
+
+import (
+	"github.com/gofiber/fiber/v2"
+	"github.com/mudler/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/config"
+	fiberContext "github.com/mudler/LocalAI/core/http/ctx"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/rs/zerolog/log"
+
+	"github.com/mudler/LocalAI/pkg/model"
+)
+
+// TokenMetricsEndpoint is an endpoint to get TokensProcessed Per Second for Active SlotID
+//
+//	@Summary	Get TokenMetrics for Active Slot.
+//	@Accept json
+//	@Produce audio/x-wav
+//	@Success	200		{string}	binary				"generated audio/wav file"
+//	@Router		/v1/tokenMetrics [get]
+//	@Router		/tokenMetrics [get]
+func TokenMetricsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+
+		input := new(schema.TokenMetricsRequest)
+
+		// Get input data from the request body
+		if err := c.BodyParser(input); err != nil {
+			return err
+		}
+
+		modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
+		if err != nil {
+			modelFile = input.Model
+			log.Warn().Msgf("Model not found in context: %s", input.Model)
+		}
+
+		cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
+			config.LoadOptionDebug(appConfig.Debug),
+			config.LoadOptionThreads(appConfig.Threads),
+			config.LoadOptionContextSize(appConfig.ContextSize),
+			config.LoadOptionF16(appConfig.F16),
+		)
+
+		if err != nil {
+			log.Err(err)
+			modelFile = input.Model
+			log.Warn().Msgf("Model not found in context: %s", input.Model)
+		} else {
+			modelFile = cfg.Model
+		}
+		log.Debug().Msgf("Token Metrics for model: %s", modelFile)
+
+		response, err := backend.TokenMetrics(cfg.Backend, modelFile, ml, appConfig, *cfg)
+		if err != nil {
+			return err
+		}
+		return c.JSON(response)
+	}
+}
--- a/core/schema/localai.go
+++ b/core/schema/localai.go
@ -10,6 +10,10 @@ type BackendMonitorRequest struct {
 	Model string `json:"model" yaml:"model"`
 }

+type TokenMetricsRequest struct {
+	Model string `json:"model" yaml:"model"`
+}
+
 type BackendMonitorResponse struct {
 	MemoryInfo    *gopsutil.MemoryInfoStat
 	MemoryPercent float32