feat: move other backends to grpc

This finally makes everything more consistent Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-28 22:44:59 +00:00 · 2023-07-15 01:19:43 +02:00 · 2023-07-15 01:19:43 +02:00 · 1d0ed95a54
commit 1d0ed95a54
parent 5dcfdbe51d
54 changed files with 3171 additions and 1712 deletions
--- a/pkg/grpc/proto/llmserver.proto
+++ b/pkg/grpc/proto/llmserver.proto
@ -1,90 +0,0 @@
-syntax = "proto3";
-
-option go_package = "github.com/go-skynet/LocalAI/pkg/grpc/proto";
-option java_multiple_files = true;
-option java_package = "io.skynet.localai.llmserver";
-option java_outer_classname = "LLMServer";
-
-package llm;
-
-service LLM {
-  rpc Health(HealthMessage) returns (Reply) {}
-  rpc Predict(PredictOptions) returns (Reply) {}
-  rpc LoadModel(ModelOptions) returns (Result) {}
-  rpc PredictStream(PredictOptions) returns (stream Reply) {}
-  rpc Embedding(PredictOptions) returns (EmbeddingResult) {}
-}
-
-message HealthMessage {}
-
-// The request message containing the user's name.
-message PredictOptions {
-  string Prompt = 1;
-  int32 Seed = 2;
-  int32 Threads = 3;
-  int32 Tokens = 4;
-  int32 TopK = 5;
-  int32 Repeat = 6;
-  int32 Batch = 7;
-  int32 NKeep = 8;
-  float Temperature = 9;
-  float Penalty = 10;
-  bool F16KV = 11;
-  bool DebugMode = 12;
-  repeated string StopPrompts = 13;
-  bool IgnoreEOS = 14;
-  float TailFreeSamplingZ = 15;
-  float TypicalP = 16;
-  float FrequencyPenalty = 17;
-  float PresencePenalty = 18;
-  int32 Mirostat = 19;
-  float MirostatETA = 20;
-  float MirostatTAU = 21;
-  bool PenalizeNL = 22;
-  string LogitBias = 23;
-  bool MLock = 25;
-  bool MMap = 26;
-  bool PromptCacheAll = 27;
-  bool PromptCacheRO = 28;
-  string Grammar = 29;
-  string MainGPU = 30;
-  string TensorSplit = 31;
-  float TopP = 32;
-  string PromptCachePath = 33;
-  bool Debug = 34;
-  repeated int32 EmbeddingTokens = 35;
-  string Embeddings = 36;
-}
-
-// The response message containing the result
-message Reply {
-  string message = 1;
-}
-
-message ModelOptions {
-  string Model = 1;
-  int32 ContextSize = 2;
-  int32 Seed = 3;
-  int32 NBatch = 4;
-  bool F16Memory = 5;
-  bool MLock = 6;
-  bool MMap = 7;
-  bool VocabOnly = 8;
-  bool LowVRAM = 9;
-  bool Embeddings = 10;
-  bool NUMA = 11;
-  int32 NGPULayers = 12;
-  string MainGPU = 13;
-  string TensorSplit = 14;
-  int32 Threads = 15;
-  string LibrarySearchPath = 16;
-}
-
-message Result {
-  string message = 1;
-  bool success = 2;
-}
-
-message EmbeddingResult {
-  repeated float embeddings = 1;
-}