refactor: move backends into the backends directory (#1279)

* refactor: move backends into the backends directory Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactor: move main close to implementation for every backend Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-20 10:35:01 +00:00 · 2023-11-13 22:40:16 +01:00 · 2023-11-13 22:40:16 +01:00 · ad0e30bca5
commit ad0e30bca5
parent 55461188a4
102 changed files with 156 additions and 190 deletions
--- a/pkg/grpc/proto/backend.proto
+++ b/pkg/grpc/proto/backend.proto
@ -1,208 +0,0 @@
-syntax = "proto3";
-
-option go_package = "github.com/go-skynet/LocalAI/pkg/grpc/proto";
-option java_multiple_files = true;
-option java_package = "io.skynet.localai.backend";
-option java_outer_classname = "LocalAIBackend";
-
-package backend;
-
-service Backend {
-  rpc Health(HealthMessage) returns (Reply) {}
-  rpc Predict(PredictOptions) returns (Reply) {}
-  rpc LoadModel(ModelOptions) returns (Result) {}
-  rpc PredictStream(PredictOptions) returns (stream Reply) {}
-  rpc Embedding(PredictOptions) returns (EmbeddingResult) {}
-  rpc GenerateImage(GenerateImageRequest) returns (Result) {}
-  rpc AudioTranscription(TranscriptRequest) returns (TranscriptResult) {}
-  rpc TTS(TTSRequest) returns (Result) {}
-  rpc TokenizeString(PredictOptions) returns (TokenizationResponse) {}
-  rpc Status(HealthMessage) returns (StatusResponse) {}
-}
-
-message HealthMessage {}
-
-// The request message containing the user's name.
-message PredictOptions {
-  string Prompt = 1;
-  int32 Seed = 2;
-  int32 Threads = 3;
-  int32 Tokens = 4;
-  int32 TopK = 5;
-  int32 Repeat = 6;
-  int32 Batch = 7;
-  int32 NKeep = 8;
-  float Temperature = 9;
-  float Penalty = 10;
-  bool F16KV = 11;
-  bool DebugMode = 12;
-  repeated string StopPrompts = 13;
-  bool IgnoreEOS = 14;
-  float TailFreeSamplingZ = 15;
-  float TypicalP = 16;
-  float FrequencyPenalty = 17;
-  float PresencePenalty = 18;
-  int32 Mirostat = 19;
-  float MirostatETA = 20;
-  float MirostatTAU = 21;
-  bool PenalizeNL = 22;
-  string LogitBias = 23;
-  bool MLock = 25;
-  bool MMap = 26;
-  bool PromptCacheAll = 27;
-  bool PromptCacheRO = 28;
-  string Grammar = 29;
-  string MainGPU = 30;
-  string TensorSplit = 31;
-  float TopP = 32;
-  string PromptCachePath = 33;
-  bool Debug = 34;
-  repeated int32 EmbeddingTokens = 35;
-  string Embeddings = 36;
-  float RopeFreqBase = 37;
-  float RopeFreqScale = 38;
-  float NegativePromptScale = 39;
-  string NegativePrompt = 40;
-  int32 NDraft = 41;
-  repeated string Images = 42;
-}
-
-// The response message containing the result
-message Reply {
-  bytes message = 1;
-}
-
-message ModelOptions {
-  string Model = 1;
-  int32 ContextSize = 2;
-  int32 Seed = 3;
-  int32 NBatch = 4;
-  bool F16Memory = 5;
-  bool MLock = 6;
-  bool MMap = 7;
-  bool VocabOnly = 8;
-  bool LowVRAM = 9;
-  bool Embeddings = 10;
-  bool NUMA = 11;
-  int32 NGPULayers = 12;
-  string MainGPU = 13;
-  string TensorSplit = 14;
-  int32 Threads = 15;
-  string LibrarySearchPath = 16;
-  float RopeFreqBase = 17;
-  float RopeFreqScale = 18;
-  float RMSNormEps = 19;
-  int32 NGQA = 20;
-  string ModelFile = 21;
-
-  // AutoGPTQ
-  string Device = 22;
-  bool UseTriton = 23;
-  string ModelBaseName = 24;
-  bool UseFastTokenizer = 25;
-
-  // Diffusers
-  string PipelineType = 26;
-  string SchedulerType = 27;
-  bool CUDA = 28;
-  float CFGScale = 29;
-  bool IMG2IMG = 30;
-  string CLIPModel = 31;
-  string CLIPSubfolder = 32;
-  int32 CLIPSkip = 33;
-
-  // RWKV
-  string Tokenizer = 34;
-
-  // LLM (llama.cpp)
-  string LoraBase = 35;
-  string LoraAdapter = 36;
-  float LoraScale = 42;
-
-  bool NoMulMatQ = 37;
-  string DraftModel = 39;
-  
-  string AudioPath = 38;
-
-  // vllm
-  string Quantization = 40;
-
-  string MMProj = 41;
-
-  string RopeScaling = 43;
-  float YarnExtFactor = 44;
-  float YarnAttnFactor = 45;
-  float YarnBetaFast = 46;
-  float YarnBetaSlow = 47;
-}
-
-message Result {
-  string message = 1;
-  bool success = 2;
-}
-
-message EmbeddingResult {
-  repeated float embeddings = 1;
-}
-
-message TranscriptRequest {
-  string dst = 2;
-  string language = 3;
-  uint32 threads = 4;
-}
-
-message TranscriptResult {
-  repeated TranscriptSegment segments = 1;
-  string text = 2;
-}
-
-message TranscriptSegment {
-  int32 id = 1;
-  int64 start = 2;
-  int64 end = 3;
-  string text = 4;
-  repeated int32 tokens = 5;
-}
-
-message GenerateImageRequest {
-  int32 height = 1;
-  int32 width = 2;
-  int32 mode = 3;
-  int32 step = 4;
-  int32 seed = 5;
-  string positive_prompt = 6;
-  string negative_prompt = 7;
-  string dst = 8;
-  string src = 9;
-
-  // Diffusers
-  string EnableParameters = 10;
-  int32 CLIPSkip = 11;
-}
-
-message TTSRequest {
-  string text = 1;
-  string model = 2;
-  string dst = 3;
-}
-
-message TokenizationResponse {
-  int32 length = 1;
-  repeated int32 tokens = 2;
-}
-
-message MemoryUsageData {
-  uint64 total = 1;
-  map<string, uint64> breakdown = 2;
-}
-
-message StatusResponse {
-  enum State {
-    UNINITIALIZED = 0;
-    BUSY = 1;
-    READY = 2;
-    ERROR = -1;
-  }
-  State state = 1;
-  MemoryUsageData memory = 2;
-}