mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-20 10:35:01 +00:00
feat: add initial AutoGPTQ backend implementation
This commit is contained in:
parent
91d49cfe9f
commit
a843e64fc2
37 changed files with 660 additions and 148 deletions
3
Makefile
3
Makefile
|
@ -335,7 +335,8 @@ protogen-go:
|
||||||
pkg/grpc/proto/backend.proto
|
pkg/grpc/proto/backend.proto
|
||||||
|
|
||||||
protogen-python:
|
protogen-python:
|
||||||
python -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/huggingface/ --grpc_python_out=extra/grpc/huggingface/ pkg/grpc/proto/backend.proto
|
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/huggingface/ --grpc_python_out=extra/grpc/huggingface/ pkg/grpc/proto/backend.proto
|
||||||
|
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/autogptq/ --grpc_python_out=extra/grpc/autogptq/ pkg/grpc/proto/backend.proto
|
||||||
|
|
||||||
## GRPC
|
## GRPC
|
||||||
|
|
||||||
|
|
|
@ -26,7 +26,7 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c config.
|
||||||
model.WithLoadGRPCLLMModelOpts(grpcOpts),
|
model.WithLoadGRPCLLMModelOpts(grpcOpts),
|
||||||
model.WithThreads(uint32(c.Threads)),
|
model.WithThreads(uint32(c.Threads)),
|
||||||
model.WithAssetDir(o.AssetsDestination),
|
model.WithAssetDir(o.AssetsDestination),
|
||||||
model.WithModelFile(modelFile),
|
model.WithModel(modelFile),
|
||||||
model.WithContext(o.Context),
|
model.WithContext(o.Context),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,7 @@ func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negat
|
||||||
model.WithAssetDir(o.AssetsDestination),
|
model.WithAssetDir(o.AssetsDestination),
|
||||||
model.WithThreads(uint32(c.Threads)),
|
model.WithThreads(uint32(c.Threads)),
|
||||||
model.WithContext(o.Context),
|
model.WithContext(o.Context),
|
||||||
model.WithModelFile(c.ImageGenerationAssets),
|
model.WithModel(c.ImageGenerationAssets),
|
||||||
}
|
}
|
||||||
|
|
||||||
for k, v := range o.ExternalGRPCBackends {
|
for k, v := range o.ExternalGRPCBackends {
|
||||||
|
|
|
@ -27,7 +27,7 @@ func ModelInference(ctx context.Context, s string, loader *model.ModelLoader, c
|
||||||
model.WithLoadGRPCLLMModelOpts(grpcOpts),
|
model.WithLoadGRPCLLMModelOpts(grpcOpts),
|
||||||
model.WithThreads(uint32(c.Threads)), // some models uses this to allocate threads during startup
|
model.WithThreads(uint32(c.Threads)), // some models uses this to allocate threads during startup
|
||||||
model.WithAssetDir(o.AssetsDestination),
|
model.WithAssetDir(o.AssetsDestination),
|
||||||
model.WithModelFile(modelFile),
|
model.WithModel(modelFile),
|
||||||
model.WithContext(o.Context),
|
model.WithContext(o.Context),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -19,6 +19,9 @@ func gRPCModelOpts(c config.Config) *pb.ModelOptions {
|
||||||
Seed: int32(c.Seed),
|
Seed: int32(c.Seed),
|
||||||
NBatch: int32(b),
|
NBatch: int32(b),
|
||||||
NGQA: c.NGQA,
|
NGQA: c.NGQA,
|
||||||
|
ModelBaseName: c.ModelBaseName,
|
||||||
|
Device: c.Device,
|
||||||
|
UseTriton: c.Triton,
|
||||||
RMSNormEps: c.RMSNormEps,
|
RMSNormEps: c.RMSNormEps,
|
||||||
F16Memory: c.F16,
|
F16Memory: c.F16,
|
||||||
MLock: c.MMlock,
|
MLock: c.MMlock,
|
||||||
|
|
|
@ -15,7 +15,7 @@ import (
|
||||||
func ModelTranscription(audio, language string, loader *model.ModelLoader, c config.Config, o *options.Option) (*api.Result, error) {
|
func ModelTranscription(audio, language string, loader *model.ModelLoader, c config.Config, o *options.Option) (*api.Result, error) {
|
||||||
opts := []model.Option{
|
opts := []model.Option{
|
||||||
model.WithBackendString(model.WhisperBackend),
|
model.WithBackendString(model.WhisperBackend),
|
||||||
model.WithModelFile(c.Model),
|
model.WithModel(c.Model),
|
||||||
model.WithContext(o.Context),
|
model.WithContext(o.Context),
|
||||||
model.WithThreads(uint32(c.Threads)),
|
model.WithThreads(uint32(c.Threads)),
|
||||||
model.WithAssetDir(o.AssetsDestination),
|
model.WithAssetDir(o.AssetsDestination),
|
||||||
|
|
|
@ -54,6 +54,11 @@ type Config struct {
|
||||||
|
|
||||||
RMSNormEps float32 `yaml:"rms_norm_eps"`
|
RMSNormEps float32 `yaml:"rms_norm_eps"`
|
||||||
NGQA int32 `yaml:"ngqa"`
|
NGQA int32 `yaml:"ngqa"`
|
||||||
|
|
||||||
|
// AutoGPTQ
|
||||||
|
ModelBaseName string `yaml:"model_base_name"`
|
||||||
|
Device string `yaml:"device"`
|
||||||
|
Triton bool `yaml:"triton"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type Functions struct {
|
type Functions struct {
|
||||||
|
|
|
@ -2,6 +2,7 @@ package openai
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
|
||||||
config "github.com/go-skynet/LocalAI/api/config"
|
config "github.com/go-skynet/LocalAI/api/config"
|
||||||
|
|
||||||
"github.com/go-skynet/LocalAI/pkg/grammar"
|
"github.com/go-skynet/LocalAI/pkg/grammar"
|
||||||
|
@ -106,4 +107,9 @@ type OpenAIRequest struct {
|
||||||
Grammar string `json:"grammar" yaml:"grammar"`
|
Grammar string `json:"grammar" yaml:"grammar"`
|
||||||
|
|
||||||
JSONFunctionGrammarObject *grammar.JSONFunctionStructure `json:"grammar_json_functions" yaml:"grammar_json_functions"`
|
JSONFunctionGrammarObject *grammar.JSONFunctionStructure `json:"grammar_json_functions" yaml:"grammar_json_functions"`
|
||||||
|
|
||||||
|
Backend string `json:"backend" yaml:"backend"`
|
||||||
|
|
||||||
|
// AutoGPTQ
|
||||||
|
ModelBaseName string `json:"model_base_name" yaml:"model_base_name"`
|
||||||
}
|
}
|
||||||
|
|
|
@ -71,6 +71,14 @@ func updateConfig(config *config.Config, input *OpenAIRequest) {
|
||||||
config.TopP = input.TopP
|
config.TopP = input.TopP
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if input.Backend != "" {
|
||||||
|
config.Backend = input.Backend
|
||||||
|
}
|
||||||
|
|
||||||
|
if input.ModelBaseName != "" {
|
||||||
|
config.ModelBaseName = input.ModelBaseName
|
||||||
|
}
|
||||||
|
|
||||||
if input.NegativePromptScale != 0 {
|
if input.NegativePromptScale != 0 {
|
||||||
config.NegativePromptScale = input.NegativePromptScale
|
config.NegativePromptScale = input.NegativePromptScale
|
||||||
}
|
}
|
||||||
|
|
94
extra/grpc/autogptq/autogptq.py
Executable file
94
extra/grpc/autogptq/autogptq.py
Executable file
|
@ -0,0 +1,94 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
import grpc
|
||||||
|
from concurrent import futures
|
||||||
|
import time
|
||||||
|
import backend_pb2
|
||||||
|
import backend_pb2_grpc
|
||||||
|
import argparse
|
||||||
|
import signal
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
|
||||||
|
from pathlib import Path
|
||||||
|
from transformers import AutoTokenizer
|
||||||
|
from transformers import TextGenerationPipeline
|
||||||
|
|
||||||
|
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||||
|
|
||||||
|
# Implement the BackendServicer class with the service methods
|
||||||
|
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||||
|
def Health(self, request, context):
|
||||||
|
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
||||||
|
def LoadModel(self, request, context):
|
||||||
|
try:
|
||||||
|
device = "cuda:0"
|
||||||
|
if request.Device != "":
|
||||||
|
device = request.Device
|
||||||
|
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(request.Model, use_fast=True)
|
||||||
|
|
||||||
|
model = AutoGPTQForCausalLM.from_quantized(request.Model,
|
||||||
|
model_basename=request.ModelBaseName,
|
||||||
|
use_safetensors=True,
|
||||||
|
trust_remote_code=True,
|
||||||
|
device=device,
|
||||||
|
use_triton=request.UseTriton,
|
||||||
|
quantize_config=None)
|
||||||
|
|
||||||
|
self.model = model
|
||||||
|
self.tokenizer = tokenizer
|
||||||
|
except Exception as err:
|
||||||
|
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||||
|
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
||||||
|
|
||||||
|
def Predict(self, request, context):
|
||||||
|
# Implement Predict RPC
|
||||||
|
pipeline = TextGenerationPipeline(
|
||||||
|
model=self.model,
|
||||||
|
tokenizer=self.tokenizer,
|
||||||
|
max_new_tokens=request.Tokens,
|
||||||
|
temperature=request.Temperature,
|
||||||
|
top_p=request.TopP,
|
||||||
|
repetition_penalty=request.Penalty,
|
||||||
|
)
|
||||||
|
return backend_pb2.Result(message=bytes(pipeline(request.Prompt)[0]["generated_text"]))
|
||||||
|
|
||||||
|
def PredictStream(self, request, context):
|
||||||
|
# Implement PredictStream RPC
|
||||||
|
#for reply in some_data_generator():
|
||||||
|
# yield reply
|
||||||
|
# Not implemented yet
|
||||||
|
return self.Predict(request, context)
|
||||||
|
|
||||||
|
|
||||||
|
def serve(address):
|
||||||
|
server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
|
||||||
|
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||||
|
server.add_insecure_port(address)
|
||||||
|
server.start()
|
||||||
|
print("Server started. Listening on: " + address, file=sys.stderr)
|
||||||
|
|
||||||
|
# Define the signal handler function
|
||||||
|
def signal_handler(sig, frame):
|
||||||
|
print("Received termination signal. Shutting down...")
|
||||||
|
server.stop(0)
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
# Set the signal handlers for SIGINT and SIGTERM
|
||||||
|
signal.signal(signal.SIGINT, signal_handler)
|
||||||
|
signal.signal(signal.SIGTERM, signal_handler)
|
||||||
|
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
time.sleep(_ONE_DAY_IN_SECONDS)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
server.stop(0)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--addr", default="localhost:50051", help="The address to bind the server to."
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
serve(args.addr)
|
49
extra/grpc/autogptq/backend_pb2.py
Normal file
49
extra/grpc/autogptq/backend_pb2.py
Normal file
|
@ -0,0 +1,49 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
||||||
|
# source: backend.proto
|
||||||
|
"""Generated protocol buffer code."""
|
||||||
|
from google.protobuf import descriptor as _descriptor
|
||||||
|
from google.protobuf import descriptor_pool as _descriptor_pool
|
||||||
|
from google.protobuf import symbol_database as _symbol_database
|
||||||
|
from google.protobuf.internal import builder as _builder
|
||||||
|
# @@protoc_insertion_point(imports)
|
||||||
|
|
||||||
|
_sym_db = _symbol_database.Default()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\x86\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xc8\x03\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\x9e\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t2\xeb\x03\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
|
||||||
|
|
||||||
|
_globals = globals()
|
||||||
|
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
|
||||||
|
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals)
|
||||||
|
if _descriptor._USE_C_DESCRIPTORS == False:
|
||||||
|
|
||||||
|
DESCRIPTOR._options = None
|
||||||
|
DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
|
||||||
|
_globals['_HEALTHMESSAGE']._serialized_start=26
|
||||||
|
_globals['_HEALTHMESSAGE']._serialized_end=41
|
||||||
|
_globals['_PREDICTOPTIONS']._serialized_start=44
|
||||||
|
_globals['_PREDICTOPTIONS']._serialized_end=818
|
||||||
|
_globals['_REPLY']._serialized_start=820
|
||||||
|
_globals['_REPLY']._serialized_end=844
|
||||||
|
_globals['_MODELOPTIONS']._serialized_start=847
|
||||||
|
_globals['_MODELOPTIONS']._serialized_end=1303
|
||||||
|
_globals['_RESULT']._serialized_start=1305
|
||||||
|
_globals['_RESULT']._serialized_end=1347
|
||||||
|
_globals['_EMBEDDINGRESULT']._serialized_start=1349
|
||||||
|
_globals['_EMBEDDINGRESULT']._serialized_end=1386
|
||||||
|
_globals['_TRANSCRIPTREQUEST']._serialized_start=1388
|
||||||
|
_globals['_TRANSCRIPTREQUEST']._serialized_end=1455
|
||||||
|
_globals['_TRANSCRIPTRESULT']._serialized_start=1457
|
||||||
|
_globals['_TRANSCRIPTRESULT']._serialized_end=1535
|
||||||
|
_globals['_TRANSCRIPTSEGMENT']._serialized_start=1537
|
||||||
|
_globals['_TRANSCRIPTSEGMENT']._serialized_end=1626
|
||||||
|
_globals['_GENERATEIMAGEREQUEST']._serialized_start=1629
|
||||||
|
_globals['_GENERATEIMAGEREQUEST']._serialized_end=1787
|
||||||
|
_globals['_TTSREQUEST']._serialized_start=1789
|
||||||
|
_globals['_TTSREQUEST']._serialized_end=1843
|
||||||
|
_globals['_BACKEND']._serialized_start=1846
|
||||||
|
_globals['_BACKEND']._serialized_end=2337
|
||||||
|
# @@protoc_insertion_point(module_scope)
|
297
extra/grpc/autogptq/backend_pb2_grpc.py
Normal file
297
extra/grpc/autogptq/backend_pb2_grpc.py
Normal file
|
@ -0,0 +1,297 @@
|
||||||
|
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
|
||||||
|
"""Client and server classes corresponding to protobuf-defined services."""
|
||||||
|
import grpc
|
||||||
|
|
||||||
|
import backend_pb2 as backend__pb2
|
||||||
|
|
||||||
|
|
||||||
|
class BackendStub(object):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
|
||||||
|
def __init__(self, channel):
|
||||||
|
"""Constructor.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
channel: A grpc.Channel.
|
||||||
|
"""
|
||||||
|
self.Health = channel.unary_unary(
|
||||||
|
'/backend.Backend/Health',
|
||||||
|
request_serializer=backend__pb2.HealthMessage.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.Reply.FromString,
|
||||||
|
)
|
||||||
|
self.Predict = channel.unary_unary(
|
||||||
|
'/backend.Backend/Predict',
|
||||||
|
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.Reply.FromString,
|
||||||
|
)
|
||||||
|
self.LoadModel = channel.unary_unary(
|
||||||
|
'/backend.Backend/LoadModel',
|
||||||
|
request_serializer=backend__pb2.ModelOptions.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.Result.FromString,
|
||||||
|
)
|
||||||
|
self.PredictStream = channel.unary_stream(
|
||||||
|
'/backend.Backend/PredictStream',
|
||||||
|
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.Reply.FromString,
|
||||||
|
)
|
||||||
|
self.Embedding = channel.unary_unary(
|
||||||
|
'/backend.Backend/Embedding',
|
||||||
|
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.EmbeddingResult.FromString,
|
||||||
|
)
|
||||||
|
self.GenerateImage = channel.unary_unary(
|
||||||
|
'/backend.Backend/GenerateImage',
|
||||||
|
request_serializer=backend__pb2.GenerateImageRequest.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.Result.FromString,
|
||||||
|
)
|
||||||
|
self.AudioTranscription = channel.unary_unary(
|
||||||
|
'/backend.Backend/AudioTranscription',
|
||||||
|
request_serializer=backend__pb2.TranscriptRequest.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.TranscriptResult.FromString,
|
||||||
|
)
|
||||||
|
self.TTS = channel.unary_unary(
|
||||||
|
'/backend.Backend/TTS',
|
||||||
|
request_serializer=backend__pb2.TTSRequest.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.Result.FromString,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class BackendServicer(object):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
|
||||||
|
def Health(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def Predict(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def LoadModel(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def PredictStream(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def Embedding(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def GenerateImage(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def AudioTranscription(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def TTS(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
|
||||||
|
def add_BackendServicer_to_server(servicer, server):
|
||||||
|
rpc_method_handlers = {
|
||||||
|
'Health': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.Health,
|
||||||
|
request_deserializer=backend__pb2.HealthMessage.FromString,
|
||||||
|
response_serializer=backend__pb2.Reply.SerializeToString,
|
||||||
|
),
|
||||||
|
'Predict': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.Predict,
|
||||||
|
request_deserializer=backend__pb2.PredictOptions.FromString,
|
||||||
|
response_serializer=backend__pb2.Reply.SerializeToString,
|
||||||
|
),
|
||||||
|
'LoadModel': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.LoadModel,
|
||||||
|
request_deserializer=backend__pb2.ModelOptions.FromString,
|
||||||
|
response_serializer=backend__pb2.Result.SerializeToString,
|
||||||
|
),
|
||||||
|
'PredictStream': grpc.unary_stream_rpc_method_handler(
|
||||||
|
servicer.PredictStream,
|
||||||
|
request_deserializer=backend__pb2.PredictOptions.FromString,
|
||||||
|
response_serializer=backend__pb2.Reply.SerializeToString,
|
||||||
|
),
|
||||||
|
'Embedding': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.Embedding,
|
||||||
|
request_deserializer=backend__pb2.PredictOptions.FromString,
|
||||||
|
response_serializer=backend__pb2.EmbeddingResult.SerializeToString,
|
||||||
|
),
|
||||||
|
'GenerateImage': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.GenerateImage,
|
||||||
|
request_deserializer=backend__pb2.GenerateImageRequest.FromString,
|
||||||
|
response_serializer=backend__pb2.Result.SerializeToString,
|
||||||
|
),
|
||||||
|
'AudioTranscription': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.AudioTranscription,
|
||||||
|
request_deserializer=backend__pb2.TranscriptRequest.FromString,
|
||||||
|
response_serializer=backend__pb2.TranscriptResult.SerializeToString,
|
||||||
|
),
|
||||||
|
'TTS': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.TTS,
|
||||||
|
request_deserializer=backend__pb2.TTSRequest.FromString,
|
||||||
|
response_serializer=backend__pb2.Result.SerializeToString,
|
||||||
|
),
|
||||||
|
}
|
||||||
|
generic_handler = grpc.method_handlers_generic_handler(
|
||||||
|
'backend.Backend', rpc_method_handlers)
|
||||||
|
server.add_generic_rpc_handlers((generic_handler,))
|
||||||
|
|
||||||
|
|
||||||
|
# This class is part of an EXPERIMENTAL API.
|
||||||
|
class Backend(object):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def Health(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health',
|
||||||
|
backend__pb2.HealthMessage.SerializeToString,
|
||||||
|
backend__pb2.Reply.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def Predict(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict',
|
||||||
|
backend__pb2.PredictOptions.SerializeToString,
|
||||||
|
backend__pb2.Reply.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def LoadModel(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel',
|
||||||
|
backend__pb2.ModelOptions.SerializeToString,
|
||||||
|
backend__pb2.Result.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def PredictStream(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream',
|
||||||
|
backend__pb2.PredictOptions.SerializeToString,
|
||||||
|
backend__pb2.Reply.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def Embedding(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding',
|
||||||
|
backend__pb2.PredictOptions.SerializeToString,
|
||||||
|
backend__pb2.EmbeddingResult.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def GenerateImage(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage',
|
||||||
|
backend__pb2.GenerateImageRequest.SerializeToString,
|
||||||
|
backend__pb2.Result.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def AudioTranscription(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription',
|
||||||
|
backend__pb2.TranscriptRequest.SerializeToString,
|
||||||
|
backend__pb2.TranscriptResult.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def TTS(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS',
|
||||||
|
backend__pb2.TTSRequest.SerializeToString,
|
||||||
|
backend__pb2.Result.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
@ -13,7 +13,7 @@ _sym_db = _symbol_database.Default()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\x86\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xfb\x02\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\x9e\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t2\xeb\x03\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
|
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\x86\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xc8\x03\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\x9e\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t2\xeb\x03\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
|
||||||
|
|
||||||
_globals = globals()
|
_globals = globals()
|
||||||
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
|
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
|
||||||
|
@ -29,21 +29,21 @@ if _descriptor._USE_C_DESCRIPTORS == False:
|
||||||
_globals['_REPLY']._serialized_start=820
|
_globals['_REPLY']._serialized_start=820
|
||||||
_globals['_REPLY']._serialized_end=844
|
_globals['_REPLY']._serialized_end=844
|
||||||
_globals['_MODELOPTIONS']._serialized_start=847
|
_globals['_MODELOPTIONS']._serialized_start=847
|
||||||
_globals['_MODELOPTIONS']._serialized_end=1226
|
_globals['_MODELOPTIONS']._serialized_end=1303
|
||||||
_globals['_RESULT']._serialized_start=1228
|
_globals['_RESULT']._serialized_start=1305
|
||||||
_globals['_RESULT']._serialized_end=1270
|
_globals['_RESULT']._serialized_end=1347
|
||||||
_globals['_EMBEDDINGRESULT']._serialized_start=1272
|
_globals['_EMBEDDINGRESULT']._serialized_start=1349
|
||||||
_globals['_EMBEDDINGRESULT']._serialized_end=1309
|
_globals['_EMBEDDINGRESULT']._serialized_end=1386
|
||||||
_globals['_TRANSCRIPTREQUEST']._serialized_start=1311
|
_globals['_TRANSCRIPTREQUEST']._serialized_start=1388
|
||||||
_globals['_TRANSCRIPTREQUEST']._serialized_end=1378
|
_globals['_TRANSCRIPTREQUEST']._serialized_end=1455
|
||||||
_globals['_TRANSCRIPTRESULT']._serialized_start=1380
|
_globals['_TRANSCRIPTRESULT']._serialized_start=1457
|
||||||
_globals['_TRANSCRIPTRESULT']._serialized_end=1458
|
_globals['_TRANSCRIPTRESULT']._serialized_end=1535
|
||||||
_globals['_TRANSCRIPTSEGMENT']._serialized_start=1460
|
_globals['_TRANSCRIPTSEGMENT']._serialized_start=1537
|
||||||
_globals['_TRANSCRIPTSEGMENT']._serialized_end=1549
|
_globals['_TRANSCRIPTSEGMENT']._serialized_end=1626
|
||||||
_globals['_GENERATEIMAGEREQUEST']._serialized_start=1552
|
_globals['_GENERATEIMAGEREQUEST']._serialized_start=1629
|
||||||
_globals['_GENERATEIMAGEREQUEST']._serialized_end=1710
|
_globals['_GENERATEIMAGEREQUEST']._serialized_end=1787
|
||||||
_globals['_TTSREQUEST']._serialized_start=1712
|
_globals['_TTSREQUEST']._serialized_start=1789
|
||||||
_globals['_TTSREQUEST']._serialized_end=1766
|
_globals['_TTSREQUEST']._serialized_end=1843
|
||||||
_globals['_BACKEND']._serialized_start=1769
|
_globals['_BACKEND']._serialized_start=1846
|
||||||
_globals['_BACKEND']._serialized_end=2260
|
_globals['_BACKEND']._serialized_end=2337
|
||||||
# @@protoc_insertion_point(module_scope)
|
# @@protoc_insertion_point(module_scope)
|
||||||
|
|
|
@ -18,7 +18,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
||||||
def LoadModel(self, request, context):
|
def LoadModel(self, request, context):
|
||||||
model_name = request.Model
|
model_name = request.Model
|
||||||
model_name = os.path.basename(model_name)
|
|
||||||
try:
|
try:
|
||||||
self.model = SentenceTransformer(model_name)
|
self.model = SentenceTransformer(model_name)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
|
|
|
@ -1,4 +1,6 @@
|
||||||
sentence_transformers
|
sentence_transformers
|
||||||
grpcio
|
grpcio
|
||||||
google
|
google
|
||||||
protobuf
|
protobuf
|
||||||
|
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.3.0/auto_gptq-0.3.0+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
||||||
|
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.3.0/auto_gptq-0.3.0+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
|
|
|
@ -16,7 +16,7 @@ type StableDiffusion struct {
|
||||||
func (sd *StableDiffusion) Load(opts *pb.ModelOptions) error {
|
func (sd *StableDiffusion) Load(opts *pb.ModelOptions) error {
|
||||||
var err error
|
var err error
|
||||||
// Note: the Model here is a path to a directory containing the model files
|
// Note: the Model here is a path to a directory containing the model files
|
||||||
sd.stablediffusion, err = stablediffusion.New(opts.Model)
|
sd.stablediffusion, err = stablediffusion.New(opts.ModelFile)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -15,7 +15,7 @@ type Embeddings struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *Embeddings) Load(opts *pb.ModelOptions) error {
|
func (llm *Embeddings) Load(opts *pb.ModelOptions) error {
|
||||||
model, err := bert.New(opts.Model)
|
model, err := bert.New(opts.ModelFile)
|
||||||
llm.bert = model
|
llm.bert = model
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,7 +18,7 @@ type LLM struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||||
model, err := bloomz.New(opts.Model)
|
model, err := bloomz.New(opts.ModelFile)
|
||||||
llm.bloomz = model
|
llm.bloomz = model
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,7 +40,7 @@ func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||||
ggllmOpts = append(ggllmOpts, ggllm.SetNBatch(512))
|
ggllmOpts = append(ggllmOpts, ggllm.SetNBatch(512))
|
||||||
}
|
}
|
||||||
|
|
||||||
model, err := ggllm.New(opts.Model, ggllmOpts...)
|
model, err := ggllm.New(opts.ModelFile, ggllmOpts...)
|
||||||
llm.falcon = model
|
llm.falcon = model
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,7 +17,7 @@ type LLM struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||||
model, err := gpt4all.New(opts.Model,
|
model, err := gpt4all.New(opts.ModelFile,
|
||||||
gpt4all.SetThreads(int(opts.Threads)),
|
gpt4all.SetThreads(int(opts.Threads)),
|
||||||
gpt4all.SetLibrarySearchPath(opts.LibrarySearchPath))
|
gpt4all.SetLibrarySearchPath(opts.LibrarySearchPath))
|
||||||
llm.gpt4all = model
|
llm.gpt4all = model
|
||||||
|
|
|
@ -71,7 +71,7 @@ func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||||
llamaOpts = append(llamaOpts, llama.EnabelLowVRAM)
|
llamaOpts = append(llamaOpts, llama.EnabelLowVRAM)
|
||||||
}
|
}
|
||||||
|
|
||||||
model, err := llama.New(opts.Model, llamaOpts...)
|
model, err := llama.New(opts.ModelFile, llamaOpts...)
|
||||||
llm.llama = model
|
llm.llama = model
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,9 +20,9 @@ type LLM struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||||
modelPath := filepath.Dir(opts.Model)
|
modelPath := filepath.Dir(opts.ModelFile)
|
||||||
modelFile := filepath.Base(opts.Model)
|
modelFile := filepath.Base(opts.ModelFile)
|
||||||
model := rwkv.LoadFiles(opts.Model, filepath.Join(modelPath, modelFile+tokenizerSuffix), uint32(opts.GetThreads()))
|
model := rwkv.LoadFiles(opts.ModelFile, filepath.Join(modelPath, modelFile+tokenizerSuffix), uint32(opts.GetThreads()))
|
||||||
|
|
||||||
if model == nil {
|
if model == nil {
|
||||||
return fmt.Errorf("could not load model")
|
return fmt.Errorf("could not load model")
|
||||||
|
|
|
@ -18,7 +18,7 @@ type Dolly struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *Dolly) Load(opts *pb.ModelOptions) error {
|
func (llm *Dolly) Load(opts *pb.ModelOptions) error {
|
||||||
model, err := transformers.NewDolly(opts.Model)
|
model, err := transformers.NewDolly(opts.ModelFile)
|
||||||
llm.dolly = model
|
llm.dolly = model
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,7 +18,7 @@ type Falcon struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *Falcon) Load(opts *pb.ModelOptions) error {
|
func (llm *Falcon) Load(opts *pb.ModelOptions) error {
|
||||||
model, err := transformers.NewFalcon(opts.Model)
|
model, err := transformers.NewFalcon(opts.ModelFile)
|
||||||
llm.falcon = model
|
llm.falcon = model
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,7 +18,7 @@ type GPT2 struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *GPT2) Load(opts *pb.ModelOptions) error {
|
func (llm *GPT2) Load(opts *pb.ModelOptions) error {
|
||||||
model, err := transformers.New(opts.Model)
|
model, err := transformers.New(opts.ModelFile)
|
||||||
llm.gpt2 = model
|
llm.gpt2 = model
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,7 +18,7 @@ type GPTJ struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *GPTJ) Load(opts *pb.ModelOptions) error {
|
func (llm *GPTJ) Load(opts *pb.ModelOptions) error {
|
||||||
model, err := transformers.NewGPTJ(opts.Model)
|
model, err := transformers.NewGPTJ(opts.ModelFile)
|
||||||
llm.gptj = model
|
llm.gptj = model
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,7 +18,7 @@ type GPTNeoX struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *GPTNeoX) Load(opts *pb.ModelOptions) error {
|
func (llm *GPTNeoX) Load(opts *pb.ModelOptions) error {
|
||||||
model, err := transformers.NewGPTNeoX(opts.Model)
|
model, err := transformers.NewGPTNeoX(opts.ModelFile)
|
||||||
llm.gptneox = model
|
llm.gptneox = model
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,7 +18,7 @@ type MPT struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *MPT) Load(opts *pb.ModelOptions) error {
|
func (llm *MPT) Load(opts *pb.ModelOptions) error {
|
||||||
model, err := transformers.NewMPT(opts.Model)
|
model, err := transformers.NewMPT(opts.ModelFile)
|
||||||
llm.mpt = model
|
llm.mpt = model
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,7 +18,7 @@ type Replit struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *Replit) Load(opts *pb.ModelOptions) error {
|
func (llm *Replit) Load(opts *pb.ModelOptions) error {
|
||||||
model, err := transformers.NewReplit(opts.Model)
|
model, err := transformers.NewReplit(opts.ModelFile)
|
||||||
llm.replit = model
|
llm.replit = model
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,7 +18,7 @@ type Starcoder struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *Starcoder) Load(opts *pb.ModelOptions) error {
|
func (llm *Starcoder) Load(opts *pb.ModelOptions) error {
|
||||||
model, err := transformers.NewStarcoder(opts.Model)
|
model, err := transformers.NewStarcoder(opts.ModelFile)
|
||||||
llm.starcoder = model
|
llm.starcoder = model
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
|
@ -483,6 +483,11 @@ type ModelOptions struct {
|
||||||
RopeFreqScale float32 `protobuf:"fixed32,18,opt,name=RopeFreqScale,proto3" json:"RopeFreqScale,omitempty"`
|
RopeFreqScale float32 `protobuf:"fixed32,18,opt,name=RopeFreqScale,proto3" json:"RopeFreqScale,omitempty"`
|
||||||
RMSNormEps float32 `protobuf:"fixed32,19,opt,name=RMSNormEps,proto3" json:"RMSNormEps,omitempty"`
|
RMSNormEps float32 `protobuf:"fixed32,19,opt,name=RMSNormEps,proto3" json:"RMSNormEps,omitempty"`
|
||||||
NGQA int32 `protobuf:"varint,20,opt,name=NGQA,proto3" json:"NGQA,omitempty"`
|
NGQA int32 `protobuf:"varint,20,opt,name=NGQA,proto3" json:"NGQA,omitempty"`
|
||||||
|
ModelFile string `protobuf:"bytes,21,opt,name=ModelFile,proto3" json:"ModelFile,omitempty"`
|
||||||
|
// AutoGPTQ
|
||||||
|
Device string `protobuf:"bytes,22,opt,name=Device,proto3" json:"Device,omitempty"`
|
||||||
|
UseTriton bool `protobuf:"varint,23,opt,name=UseTriton,proto3" json:"UseTriton,omitempty"`
|
||||||
|
ModelBaseName string `protobuf:"bytes,24,opt,name=ModelBaseName,proto3" json:"ModelBaseName,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (x *ModelOptions) Reset() {
|
func (x *ModelOptions) Reset() {
|
||||||
|
@ -657,6 +662,34 @@ func (x *ModelOptions) GetNGQA() int32 {
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (x *ModelOptions) GetModelFile() string {
|
||||||
|
if x != nil {
|
||||||
|
return x.ModelFile
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *ModelOptions) GetDevice() string {
|
||||||
|
if x != nil {
|
||||||
|
return x.Device
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *ModelOptions) GetUseTriton() bool {
|
||||||
|
if x != nil {
|
||||||
|
return x.UseTriton
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *ModelOptions) GetModelBaseName() string {
|
||||||
|
if x != nil {
|
||||||
|
return x.ModelBaseName
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
type Result struct {
|
type Result struct {
|
||||||
state protoimpl.MessageState
|
state protoimpl.MessageState
|
||||||
sizeCache protoimpl.SizeCache
|
sizeCache protoimpl.SizeCache
|
||||||
|
@ -1207,7 +1240,7 @@ var file_pkg_grpc_proto_backend_proto_rawDesc = []byte{
|
||||||
0x0e, 0x4e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x22,
|
0x0e, 0x4e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x22,
|
||||||
0x21, 0x0a, 0x05, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73,
|
0x21, 0x0a, 0x05, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73,
|
||||||
0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61,
|
0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61,
|
||||||
0x67, 0x65, 0x22, 0xc8, 0x04, 0x0a, 0x0c, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69,
|
0x67, 0x65, 0x22, 0xc2, 0x05, 0x0a, 0x0c, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69,
|
||||||
0x6f, 0x6e, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x01, 0x20, 0x01,
|
0x6f, 0x6e, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x01, 0x20, 0x01,
|
||||||
0x28, 0x09, 0x52, 0x05, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x20, 0x0a, 0x0b, 0x43, 0x6f, 0x6e,
|
0x28, 0x09, 0x52, 0x05, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x20, 0x0a, 0x0b, 0x43, 0x6f, 0x6e,
|
||||||
0x74, 0x65, 0x78, 0x74, 0x53, 0x69, 0x7a, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b,
|
0x74, 0x65, 0x78, 0x74, 0x53, 0x69, 0x7a, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b,
|
||||||
|
@ -1243,90 +1276,98 @@ var file_pkg_grpc_proto_backend_proto_rawDesc = []byte{
|
||||||
0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x1e, 0x0a, 0x0a, 0x52, 0x4d, 0x53,
|
0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x1e, 0x0a, 0x0a, 0x52, 0x4d, 0x53,
|
||||||
0x4e, 0x6f, 0x72, 0x6d, 0x45, 0x70, 0x73, 0x18, 0x13, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0a, 0x52,
|
0x4e, 0x6f, 0x72, 0x6d, 0x45, 0x70, 0x73, 0x18, 0x13, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0a, 0x52,
|
||||||
0x4d, 0x53, 0x4e, 0x6f, 0x72, 0x6d, 0x45, 0x70, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x4e, 0x47, 0x51,
|
0x4d, 0x53, 0x4e, 0x6f, 0x72, 0x6d, 0x45, 0x70, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x4e, 0x47, 0x51,
|
||||||
0x41, 0x18, 0x14, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x4e, 0x47, 0x51, 0x41, 0x22, 0x3c, 0x0a,
|
0x41, 0x18, 0x14, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x4e, 0x47, 0x51, 0x41, 0x12, 0x1c, 0x0a,
|
||||||
0x06, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61,
|
0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x46, 0x69, 0x6c, 0x65, 0x18, 0x15, 0x20, 0x01, 0x28, 0x09,
|
||||||
0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67,
|
0x52, 0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x46, 0x69, 0x6c, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x44,
|
||||||
0x65, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x18, 0x02, 0x20, 0x01,
|
0x65, 0x76, 0x69, 0x63, 0x65, 0x18, 0x16, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x44, 0x65, 0x76,
|
||||||
0x28, 0x08, 0x52, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x22, 0x31, 0x0a, 0x0f, 0x45,
|
0x69, 0x63, 0x65, 0x12, 0x1c, 0x0a, 0x09, 0x55, 0x73, 0x65, 0x54, 0x72, 0x69, 0x74, 0x6f, 0x6e,
|
||||||
0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x1e,
|
0x18, 0x17, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x55, 0x73, 0x65, 0x54, 0x72, 0x69, 0x74, 0x6f,
|
||||||
0x0a, 0x0a, 0x65, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x01, 0x20, 0x03,
|
0x6e, 0x12, 0x24, 0x0a, 0x0d, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x42, 0x61, 0x73, 0x65, 0x4e, 0x61,
|
||||||
0x28, 0x02, 0x52, 0x0a, 0x65, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x22, 0x5b,
|
0x6d, 0x65, 0x18, 0x18, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x42,
|
||||||
0x0a, 0x11, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x71, 0x75,
|
0x61, 0x73, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x22, 0x3c, 0x0a, 0x06, 0x52, 0x65, 0x73, 0x75, 0x6c,
|
||||||
0x65, 0x73, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09,
|
0x74, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01,
|
||||||
0x52, 0x03, 0x64, 0x73, 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67,
|
0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x73,
|
||||||
0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67,
|
0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x73, 0x75,
|
||||||
0x65, 0x12, 0x18, 0x0a, 0x07, 0x74, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, 0x04, 0x20, 0x01,
|
0x63, 0x63, 0x65, 0x73, 0x73, 0x22, 0x31, 0x0a, 0x0f, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69,
|
||||||
0x28, 0x0d, 0x52, 0x07, 0x74, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x22, 0x5e, 0x0a, 0x10, 0x54,
|
0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x1e, 0x0a, 0x0a, 0x65, 0x6d, 0x62, 0x65,
|
||||||
0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12,
|
0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x02, 0x52, 0x0a, 0x65, 0x6d,
|
||||||
0x36, 0x0a, 0x08, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28,
|
0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x22, 0x5b, 0x0a, 0x11, 0x54, 0x72, 0x61, 0x6e,
|
||||||
0x0b, 0x32, 0x1a, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e,
|
0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x10, 0x0a,
|
||||||
0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x52, 0x08, 0x73,
|
0x03, 0x64, 0x73, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x12,
|
||||||
0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18,
|
0x1a, 0x0a, 0x08, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28,
|
||||||
0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x22, 0x77, 0x0a, 0x11, 0x54,
|
0x09, 0x52, 0x08, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x74,
|
||||||
0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74,
|
0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x07, 0x74, 0x68,
|
||||||
0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x02, 0x69, 0x64,
|
0x72, 0x65, 0x61, 0x64, 0x73, 0x22, 0x5e, 0x0a, 0x10, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72,
|
||||||
0x12, 0x14, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52,
|
0x69, 0x70, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x36, 0x0a, 0x08, 0x73, 0x65, 0x67,
|
||||||
0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x65, 0x6e, 0x64, 0x18, 0x03, 0x20,
|
0x6d, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x62, 0x61,
|
||||||
0x01, 0x28, 0x03, 0x52, 0x03, 0x65, 0x6e, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74,
|
|
||||||
0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, 0x16, 0x0a, 0x06,
|
|
||||||
0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x05, 0x52, 0x06, 0x74, 0x6f,
|
|
||||||
0x6b, 0x65, 0x6e, 0x73, 0x22, 0xe4, 0x01, 0x0a, 0x14, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74,
|
|
||||||
0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x16, 0x0a,
|
|
||||||
0x06, 0x68, 0x65, 0x69, 0x67, 0x68, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x68,
|
|
||||||
0x65, 0x69, 0x67, 0x68, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x77, 0x69, 0x64, 0x74, 0x68, 0x18, 0x02,
|
|
||||||
0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x77, 0x69, 0x64, 0x74, 0x68, 0x12, 0x12, 0x0a, 0x04, 0x6d,
|
|
||||||
0x6f, 0x64, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x6d, 0x6f, 0x64, 0x65, 0x12,
|
|
||||||
0x12, 0x0a, 0x04, 0x73, 0x74, 0x65, 0x70, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x73,
|
|
||||||
0x74, 0x65, 0x70, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x65, 0x65, 0x64, 0x18, 0x05, 0x20, 0x01, 0x28,
|
|
||||||
0x05, 0x52, 0x04, 0x73, 0x65, 0x65, 0x64, 0x12, 0x27, 0x0a, 0x0f, 0x70, 0x6f, 0x73, 0x69, 0x74,
|
|
||||||
0x69, 0x76, 0x65, 0x5f, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09,
|
|
||||||
0x52, 0x0e, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74,
|
|
||||||
0x12, 0x27, 0x0a, 0x0f, 0x6e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x70, 0x72, 0x6f,
|
|
||||||
0x6d, 0x70, 0x74, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x6e, 0x65, 0x67, 0x61, 0x74,
|
|
||||||
0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74,
|
|
||||||
0x18, 0x08, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x22, 0x48, 0x0a, 0x0a, 0x54,
|
|
||||||
0x54, 0x53, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78,
|
|
||||||
0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, 0x14, 0x0a,
|
|
||||||
0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6d, 0x6f,
|
|
||||||
0x64, 0x65, 0x6c, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09,
|
|
||||||
0x52, 0x03, 0x64, 0x73, 0x74, 0x32, 0xeb, 0x03, 0x0a, 0x07, 0x42, 0x61, 0x63, 0x6b, 0x65, 0x6e,
|
|
||||||
0x64, 0x12, 0x32, 0x0a, 0x06, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x12, 0x16, 0x2e, 0x62, 0x61,
|
|
||||||
0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73,
|
|
||||||
0x61, 0x67, 0x65, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65,
|
|
||||||
0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x34, 0x0a, 0x07, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74,
|
|
||||||
0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69,
|
|
||||||
0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b,
|
|
||||||
0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x35, 0x0a, 0x09, 0x4c,
|
|
||||||
0x6f, 0x61, 0x64, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x15, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65,
|
|
||||||
0x6e, 0x64, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a,
|
|
||||||
0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74,
|
|
||||||
0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0d, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x53, 0x74, 0x72,
|
|
||||||
0x65, 0x61, 0x6d, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72,
|
|
||||||
0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0e, 0x2e, 0x62,
|
|
||||||
0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x30, 0x01,
|
|
||||||
0x12, 0x40, 0x0a, 0x09, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x12, 0x17, 0x2e,
|
|
||||||
0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f,
|
|
||||||
0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64,
|
|
||||||
0x2e, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74,
|
|
||||||
0x22, 0x00, 0x12, 0x41, 0x0a, 0x0d, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d,
|
|
||||||
0x61, 0x67, 0x65, 0x12, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x47, 0x65,
|
|
||||||
0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65,
|
|
||||||
0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73,
|
|
||||||
0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4d, 0x0a, 0x12, 0x41, 0x75, 0x64, 0x69, 0x6f, 0x54, 0x72,
|
|
||||||
0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x1a, 0x2e, 0x62, 0x61,
|
|
||||||
0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74,
|
0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74,
|
||||||
0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x19, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e,
|
0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x52, 0x08, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74,
|
||||||
0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x73, 0x75,
|
0x73, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52,
|
||||||
0x6c, 0x74, 0x22, 0x00, 0x12, 0x2d, 0x0a, 0x03, 0x54, 0x54, 0x53, 0x12, 0x13, 0x2e, 0x62, 0x61,
|
0x04, 0x74, 0x65, 0x78, 0x74, 0x22, 0x77, 0x0a, 0x11, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72,
|
||||||
0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x54, 0x53, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74,
|
0x69, 0x70, 0x74, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64,
|
||||||
0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c,
|
0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x02, 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x74,
|
||||||
0x74, 0x22, 0x00, 0x42, 0x5a, 0x0a, 0x19, 0x69, 0x6f, 0x2e, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74,
|
0x61, 0x72, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74,
|
||||||
0x2e, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x61, 0x69, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64,
|
0x12, 0x10, 0x0a, 0x03, 0x65, 0x6e, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x03, 0x65,
|
||||||
0x42, 0x0e, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49, 0x42, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64,
|
0x6e, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09,
|
||||||
0x50, 0x01, 0x5a, 0x2b, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x67,
|
0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73,
|
||||||
0x6f, 0x2d, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2f, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49,
|
0x18, 0x05, 0x20, 0x03, 0x28, 0x05, 0x52, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x22, 0xe4,
|
||||||
0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x67, 0x72, 0x70, 0x63, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62,
|
0x01, 0x0a, 0x14, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65,
|
||||||
0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
|
0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x68, 0x65, 0x69, 0x67, 0x68,
|
||||||
|
0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x68, 0x65, 0x69, 0x67, 0x68, 0x74, 0x12,
|
||||||
|
0x14, 0x0a, 0x05, 0x77, 0x69, 0x64, 0x74, 0x68, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05,
|
||||||
|
0x77, 0x69, 0x64, 0x74, 0x68, 0x12, 0x12, 0x0a, 0x04, 0x6d, 0x6f, 0x64, 0x65, 0x18, 0x03, 0x20,
|
||||||
|
0x01, 0x28, 0x05, 0x52, 0x04, 0x6d, 0x6f, 0x64, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x74, 0x65,
|
||||||
|
0x70, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x73, 0x74, 0x65, 0x70, 0x12, 0x12, 0x0a,
|
||||||
|
0x04, 0x73, 0x65, 0x65, 0x64, 0x18, 0x05, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x73, 0x65, 0x65,
|
||||||
|
0x64, 0x12, 0x27, 0x0a, 0x0f, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x70, 0x72,
|
||||||
|
0x6f, 0x6d, 0x70, 0x74, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x73, 0x69,
|
||||||
|
0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x27, 0x0a, 0x0f, 0x6e, 0x65,
|
||||||
|
0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x07, 0x20,
|
||||||
|
0x01, 0x28, 0x09, 0x52, 0x0e, 0x6e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f,
|
||||||
|
0x6d, 0x70, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x08, 0x20, 0x01, 0x28, 0x09,
|
||||||
|
0x52, 0x03, 0x64, 0x73, 0x74, 0x22, 0x48, 0x0a, 0x0a, 0x54, 0x54, 0x53, 0x52, 0x65, 0x71, 0x75,
|
||||||
|
0x65, 0x73, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28,
|
||||||
|
0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c,
|
||||||
|
0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x10, 0x0a,
|
||||||
|
0x03, 0x64, 0x73, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x32,
|
||||||
|
0xeb, 0x03, 0x0a, 0x07, 0x42, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x12, 0x32, 0x0a, 0x06, 0x48,
|
||||||
|
0x65, 0x61, 0x6c, 0x74, 0x68, 0x12, 0x16, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e,
|
||||||
|
0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x1a, 0x0e, 0x2e,
|
||||||
|
0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12,
|
||||||
|
0x34, 0x0a, 0x07, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63,
|
||||||
|
0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69,
|
||||||
|
0x6f, 0x6e, 0x73, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65,
|
||||||
|
0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x35, 0x0a, 0x09, 0x4c, 0x6f, 0x61, 0x64, 0x4d, 0x6f, 0x64,
|
||||||
|
0x65, 0x6c, 0x12, 0x15, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x6f, 0x64,
|
||||||
|
0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b,
|
||||||
|
0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0d,
|
||||||
|
0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x12, 0x17, 0x2e,
|
||||||
|
0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f,
|
||||||
|
0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64,
|
||||||
|
0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x30, 0x01, 0x12, 0x40, 0x0a, 0x09, 0x45, 0x6d,
|
||||||
|
0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e,
|
||||||
|
0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73,
|
||||||
|
0x1a, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x45, 0x6d, 0x62, 0x65, 0x64,
|
||||||
|
0x64, 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x41, 0x0a, 0x0d,
|
||||||
|
0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x12, 0x1d, 0x2e,
|
||||||
|
0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65,
|
||||||
|
0x49, 0x6d, 0x61, 0x67, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62,
|
||||||
|
0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12,
|
||||||
|
0x4d, 0x0a, 0x12, 0x41, 0x75, 0x64, 0x69, 0x6f, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69,
|
||||||
|
0x70, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x1a, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e,
|
||||||
|
0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73,
|
||||||
|
0x74, 0x1a, 0x19, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e,
|
||||||
|
0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x2d,
|
||||||
|
0x0a, 0x03, 0x54, 0x54, 0x53, 0x12, 0x13, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e,
|
||||||
|
0x54, 0x54, 0x53, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63,
|
||||||
|
0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x42, 0x5a, 0x0a,
|
||||||
|
0x19, 0x69, 0x6f, 0x2e, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2e, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
|
||||||
|
0x61, 0x69, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x42, 0x0e, 0x4c, 0x6f, 0x63, 0x61,
|
||||||
|
0x6c, 0x41, 0x49, 0x42, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x50, 0x01, 0x5a, 0x2b, 0x67, 0x69,
|
||||||
|
0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x67, 0x6f, 0x2d, 0x73, 0x6b, 0x79, 0x6e,
|
||||||
|
0x65, 0x74, 0x2f, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x67,
|
||||||
|
0x72, 0x70, 0x63, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f,
|
||||||
|
0x33,
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
|
|
@ -89,6 +89,12 @@ message ModelOptions {
|
||||||
float RopeFreqScale = 18;
|
float RopeFreqScale = 18;
|
||||||
float RMSNormEps = 19;
|
float RMSNormEps = 19;
|
||||||
int32 NGQA = 20;
|
int32 NGQA = 20;
|
||||||
|
string ModelFile = 21;
|
||||||
|
|
||||||
|
// AutoGPTQ
|
||||||
|
string Device = 22;
|
||||||
|
bool UseTriton = 23;
|
||||||
|
string ModelBaseName = 24;
|
||||||
}
|
}
|
||||||
|
|
||||||
message Result {
|
message Result {
|
||||||
|
|
|
@ -17,7 +17,7 @@ type Whisper struct {
|
||||||
|
|
||||||
func (sd *Whisper) Load(opts *pb.ModelOptions) error {
|
func (sd *Whisper) Load(opts *pb.ModelOptions) error {
|
||||||
// Note: the Model here is a path to a directory containing the model files
|
// Note: the Model here is a path to a directory containing the model files
|
||||||
w, err := whisper.New(opts.Model)
|
w, err := whisper.New(opts.ModelFile)
|
||||||
sd.whisper = w
|
sd.whisper = w
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,8 +18,8 @@ type Piper struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (sd *Piper) Load(opts *pb.ModelOptions) error {
|
func (sd *Piper) Load(opts *pb.ModelOptions) error {
|
||||||
if filepath.Ext(opts.Model) != ".onnx" {
|
if filepath.Ext(opts.ModelFile) != ".onnx" {
|
||||||
return fmt.Errorf("unsupported model type %s (should end with .onnx)", opts.Model)
|
return fmt.Errorf("unsupported model type %s (should end with .onnx)", opts.ModelFile)
|
||||||
}
|
}
|
||||||
var err error
|
var err error
|
||||||
// Note: the Model here is a path to a directory containing the model files
|
// Note: the Model here is a path to a directory containing the model files
|
||||||
|
|
|
@ -124,8 +124,8 @@ func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string
|
||||||
|
|
||||||
// starts the grpcModelProcess for the backend, and returns a grpc client
|
// starts the grpcModelProcess for the backend, and returns a grpc client
|
||||||
// It also loads the model
|
// It also loads the model
|
||||||
func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string) (*grpc.Client, error) {
|
func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string) (*grpc.Client, error) {
|
||||||
return func(s string) (*grpc.Client, error) {
|
return func(modelName, modelFile string) (*grpc.Client, error) {
|
||||||
log.Debug().Msgf("Loading GRPC Model %s: %+v", backend, *o)
|
log.Debug().Msgf("Loading GRPC Model %s: %+v", backend, *o)
|
||||||
|
|
||||||
var client *grpc.Client
|
var client *grpc.Client
|
||||||
|
@ -148,7 +148,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string) (*grpc
|
||||||
return nil, fmt.Errorf("failed allocating free ports: %s", err.Error())
|
return nil, fmt.Errorf("failed allocating free ports: %s", err.Error())
|
||||||
}
|
}
|
||||||
// Make sure the process is executable
|
// Make sure the process is executable
|
||||||
if err := ml.startProcess(uri, o.modelFile, serverAddress); err != nil {
|
if err := ml.startProcess(uri, o.model, serverAddress); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -172,7 +172,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string) (*grpc
|
||||||
}
|
}
|
||||||
|
|
||||||
// Make sure the process is executable
|
// Make sure the process is executable
|
||||||
if err := ml.startProcess(grpcProcess, o.modelFile, serverAddress); err != nil {
|
if err := ml.startProcess(grpcProcess, o.model, serverAddress); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -198,7 +198,8 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string) (*grpc
|
||||||
}
|
}
|
||||||
|
|
||||||
options := *o.gRPCOptions
|
options := *o.gRPCOptions
|
||||||
options.Model = s
|
options.Model = modelName
|
||||||
|
options.ModelFile = modelFile
|
||||||
|
|
||||||
log.Debug().Msgf("GRPC: Loading model with options: %+v", options)
|
log.Debug().Msgf("GRPC: Loading model with options: %+v", options)
|
||||||
|
|
||||||
|
@ -217,14 +218,14 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string) (*grpc
|
||||||
func (ml *ModelLoader) BackendLoader(opts ...Option) (model *grpc.Client, err error) {
|
func (ml *ModelLoader) BackendLoader(opts ...Option) (model *grpc.Client, err error) {
|
||||||
o := NewOptions(opts...)
|
o := NewOptions(opts...)
|
||||||
|
|
||||||
log.Debug().Msgf("Loading model %s from %s", o.backendString, o.modelFile)
|
log.Debug().Msgf("Loading model %s from %s", o.backendString, o.model)
|
||||||
|
|
||||||
backend := strings.ToLower(o.backendString)
|
backend := strings.ToLower(o.backendString)
|
||||||
|
|
||||||
// if an external backend is provided, use it
|
// if an external backend is provided, use it
|
||||||
_, externalBackendExists := o.externalBackends[backend]
|
_, externalBackendExists := o.externalBackends[backend]
|
||||||
if externalBackendExists {
|
if externalBackendExists {
|
||||||
return ml.LoadModel(o.modelFile, ml.grpcModel(backend, o))
|
return ml.LoadModel(o.model, ml.grpcModel(backend, o))
|
||||||
}
|
}
|
||||||
|
|
||||||
switch backend {
|
switch backend {
|
||||||
|
@ -232,13 +233,13 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (model *grpc.Client, err er
|
||||||
MPTBackend, Gpt2Backend, FalconBackend,
|
MPTBackend, Gpt2Backend, FalconBackend,
|
||||||
GPTNeoXBackend, ReplitBackend, StarcoderBackend, BloomzBackend,
|
GPTNeoXBackend, ReplitBackend, StarcoderBackend, BloomzBackend,
|
||||||
RwkvBackend, LCHuggingFaceBackend, BertEmbeddingsBackend, FalconGGMLBackend, StableDiffusionBackend, WhisperBackend:
|
RwkvBackend, LCHuggingFaceBackend, BertEmbeddingsBackend, FalconGGMLBackend, StableDiffusionBackend, WhisperBackend:
|
||||||
return ml.LoadModel(o.modelFile, ml.grpcModel(backend, o))
|
return ml.LoadModel(o.model, ml.grpcModel(backend, o))
|
||||||
case Gpt4AllLlamaBackend, Gpt4AllMptBackend, Gpt4AllJBackend, Gpt4All:
|
case Gpt4AllLlamaBackend, Gpt4AllMptBackend, Gpt4AllJBackend, Gpt4All:
|
||||||
o.gRPCOptions.LibrarySearchPath = filepath.Join(o.assetDir, "backend-assets", "gpt4all")
|
o.gRPCOptions.LibrarySearchPath = filepath.Join(o.assetDir, "backend-assets", "gpt4all")
|
||||||
return ml.LoadModel(o.modelFile, ml.grpcModel(Gpt4All, o))
|
return ml.LoadModel(o.model, ml.grpcModel(Gpt4All, o))
|
||||||
case PiperBackend:
|
case PiperBackend:
|
||||||
o.gRPCOptions.LibrarySearchPath = filepath.Join(o.assetDir, "backend-assets", "espeak-ng-data")
|
o.gRPCOptions.LibrarySearchPath = filepath.Join(o.assetDir, "backend-assets", "espeak-ng-data")
|
||||||
return ml.LoadModel(o.modelFile, ml.grpcModel(PiperBackend, o))
|
return ml.LoadModel(o.model, ml.grpcModel(PiperBackend, o))
|
||||||
default:
|
default:
|
||||||
return nil, fmt.Errorf("backend unsupported: %s", o.backendString)
|
return nil, fmt.Errorf("backend unsupported: %s", o.backendString)
|
||||||
}
|
}
|
||||||
|
@ -249,8 +250,8 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (*grpc.Client, error) {
|
||||||
|
|
||||||
// Is this really needed? BackendLoader already does this
|
// Is this really needed? BackendLoader already does this
|
||||||
ml.mu.Lock()
|
ml.mu.Lock()
|
||||||
if m := ml.checkIsLoaded(o.modelFile); m != nil {
|
if m := ml.checkIsLoaded(o.model); m != nil {
|
||||||
log.Debug().Msgf("Model '%s' already loaded", o.modelFile)
|
log.Debug().Msgf("Model '%s' already loaded", o.model)
|
||||||
ml.mu.Unlock()
|
ml.mu.Unlock()
|
||||||
return m, nil
|
return m, nil
|
||||||
}
|
}
|
||||||
|
@ -263,13 +264,13 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (*grpc.Client, error) {
|
||||||
for _, b := range o.externalBackends {
|
for _, b := range o.externalBackends {
|
||||||
allBackendsToAutoLoad = append(allBackendsToAutoLoad, b)
|
allBackendsToAutoLoad = append(allBackendsToAutoLoad, b)
|
||||||
}
|
}
|
||||||
log.Debug().Msgf("Loading model '%s' greedly from all the available backends: %s", o.modelFile, strings.Join(allBackendsToAutoLoad, ", "))
|
log.Debug().Msgf("Loading model '%s' greedly from all the available backends: %s", o.model, strings.Join(allBackendsToAutoLoad, ", "))
|
||||||
|
|
||||||
for _, b := range allBackendsToAutoLoad {
|
for _, b := range allBackendsToAutoLoad {
|
||||||
log.Debug().Msgf("[%s] Attempting to load", b)
|
log.Debug().Msgf("[%s] Attempting to load", b)
|
||||||
options := []Option{
|
options := []Option{
|
||||||
WithBackendString(b),
|
WithBackendString(b),
|
||||||
WithModelFile(o.modelFile),
|
WithModel(o.model),
|
||||||
WithLoadGRPCLLMModelOpts(o.gRPCOptions),
|
WithLoadGRPCLLMModelOpts(o.gRPCOptions),
|
||||||
WithThreads(o.threads),
|
WithThreads(o.threads),
|
||||||
WithAssetDir(o.assetDir),
|
WithAssetDir(o.assetDir),
|
||||||
|
|
|
@ -98,7 +98,7 @@ func (ml *ModelLoader) ListModels() ([]string, error) {
|
||||||
return models, nil
|
return models, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ml *ModelLoader) LoadModel(modelName string, loader func(string) (*grpc.Client, error)) (*grpc.Client, error) {
|
func (ml *ModelLoader) LoadModel(modelName string, loader func(string, string) (*grpc.Client, error)) (*grpc.Client, error) {
|
||||||
ml.mu.Lock()
|
ml.mu.Lock()
|
||||||
defer ml.mu.Unlock()
|
defer ml.mu.Unlock()
|
||||||
|
|
||||||
|
@ -111,7 +111,7 @@ func (ml *ModelLoader) LoadModel(modelName string, loader func(string) (*grpc.Cl
|
||||||
modelFile := filepath.Join(ml.ModelPath, modelName)
|
modelFile := filepath.Join(ml.ModelPath, modelName)
|
||||||
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
||||||
|
|
||||||
model, err := loader(modelFile)
|
model, err := loader(modelName, modelFile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,7 +8,7 @@ import (
|
||||||
|
|
||||||
type Options struct {
|
type Options struct {
|
||||||
backendString string
|
backendString string
|
||||||
modelFile string
|
model string
|
||||||
threads uint32
|
threads uint32
|
||||||
assetDir string
|
assetDir string
|
||||||
context context.Context
|
context context.Context
|
||||||
|
@ -35,9 +35,9 @@ func WithBackendString(backend string) Option {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func WithModelFile(modelFile string) Option {
|
func WithModel(modelFile string) Option {
|
||||||
return func(o *Options) {
|
return func(o *Options) {
|
||||||
o.modelFile = modelFile
|
o.model = modelFile
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue