mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-21 11:04:59 +00:00
wip
This commit is contained in:
parent
f0e265a96d
commit
78ef045bb3
21 changed files with 485 additions and 336 deletions
|
@ -26,7 +26,7 @@ type TokenUsage struct {
|
||||||
Completion int
|
Completion int
|
||||||
}
|
}
|
||||||
|
|
||||||
func ModelInference(ctx context.Context, s string, loader *model.ModelLoader, c config.Config, o *options.Option, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
|
func ModelInference(ctx context.Context, s string, images []string, loader *model.ModelLoader, c config.Config, o *options.Option, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
|
||||||
modelFile := c.Model
|
modelFile := c.Model
|
||||||
|
|
||||||
grpcOpts := gRPCModelOpts(c)
|
grpcOpts := gRPCModelOpts(c)
|
||||||
|
@ -72,6 +72,7 @@ func ModelInference(ctx context.Context, s string, loader *model.ModelLoader, c
|
||||||
fn := func() (LLMResponse, error) {
|
fn := func() (LLMResponse, error) {
|
||||||
opts := gRPCPredictOpts(c, loader.ModelPath)
|
opts := gRPCPredictOpts(c, loader.ModelPath)
|
||||||
opts.Prompt = s
|
opts.Prompt = s
|
||||||
|
opts.Images = images
|
||||||
|
|
||||||
tokenUsage := TokenUsage{}
|
tokenUsage := TokenUsage{}
|
||||||
|
|
||||||
|
|
|
@ -45,6 +45,7 @@ func gRPCModelOpts(c config.Config) *pb.ModelOptions {
|
||||||
DraftModel: c.DraftModel,
|
DraftModel: c.DraftModel,
|
||||||
AudioPath: c.VallE.AudioPath,
|
AudioPath: c.VallE.AudioPath,
|
||||||
Quantization: c.Quantization,
|
Quantization: c.Quantization,
|
||||||
|
MMProj: c.MMProj,
|
||||||
LoraAdapter: c.LoraAdapter,
|
LoraAdapter: c.LoraAdapter,
|
||||||
LoraBase: c.LoraBase,
|
LoraBase: c.LoraBase,
|
||||||
NGQA: c.NGQA,
|
NGQA: c.NGQA,
|
||||||
|
|
|
@ -104,6 +104,7 @@ type LLMConfig struct {
|
||||||
DraftModel string `yaml:"draft_model"`
|
DraftModel string `yaml:"draft_model"`
|
||||||
NDraft int32 `yaml:"n_draft"`
|
NDraft int32 `yaml:"n_draft"`
|
||||||
Quantization string `yaml:"quantization"`
|
Quantization string `yaml:"quantization"`
|
||||||
|
MMProj string `yaml:"mmproj"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type AutoGPTQ struct {
|
type AutoGPTQ struct {
|
||||||
|
|
|
@ -81,6 +81,10 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
||||||
noActionDescription = config.FunctionsConfig.NoActionDescriptionName
|
noActionDescription = config.FunctionsConfig.NoActionDescriptionName
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if input.ResponseFormat == "json_object" {
|
||||||
|
input.Grammar = grammar.JSONBNF
|
||||||
|
}
|
||||||
|
|
||||||
// process functions if we have any defined or if we have a function call string
|
// process functions if we have any defined or if we have a function call string
|
||||||
if len(input.Functions) > 0 && config.ShouldUseFunctions() {
|
if len(input.Functions) > 0 && config.ShouldUseFunctions() {
|
||||||
log.Debug().Msgf("Response needs to process functions")
|
log.Debug().Msgf("Response needs to process functions")
|
||||||
|
@ -140,14 +144,14 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
r := config.Roles[role]
|
r := config.Roles[role]
|
||||||
contentExists := i.Content != nil && *i.Content != ""
|
contentExists := i.Content != nil && i.StringContent != ""
|
||||||
// First attempt to populate content via a chat message specific template
|
// First attempt to populate content via a chat message specific template
|
||||||
if config.TemplateConfig.ChatMessage != "" {
|
if config.TemplateConfig.ChatMessage != "" {
|
||||||
chatMessageData := model.ChatMessageTemplateData{
|
chatMessageData := model.ChatMessageTemplateData{
|
||||||
SystemPrompt: config.SystemPrompt,
|
SystemPrompt: config.SystemPrompt,
|
||||||
Role: r,
|
Role: r,
|
||||||
RoleName: role,
|
RoleName: role,
|
||||||
Content: *i.Content,
|
Content: i.StringContent,
|
||||||
MessageIndex: messageIndex,
|
MessageIndex: messageIndex,
|
||||||
}
|
}
|
||||||
templatedChatMessage, err := o.Loader.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData)
|
templatedChatMessage, err := o.Loader.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData)
|
||||||
|
@ -166,7 +170,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
||||||
if content == "" {
|
if content == "" {
|
||||||
if r != "" {
|
if r != "" {
|
||||||
if contentExists {
|
if contentExists {
|
||||||
content = fmt.Sprint(r, " ", *i.Content)
|
content = fmt.Sprint(r, " ", i.StringContent)
|
||||||
}
|
}
|
||||||
if i.FunctionCall != nil {
|
if i.FunctionCall != nil {
|
||||||
j, err := json.Marshal(i.FunctionCall)
|
j, err := json.Marshal(i.FunctionCall)
|
||||||
|
@ -180,7 +184,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if contentExists {
|
if contentExists {
|
||||||
content = fmt.Sprint(*i.Content)
|
content = fmt.Sprint(i.StringContent)
|
||||||
}
|
}
|
||||||
if i.FunctionCall != nil {
|
if i.FunctionCall != nil {
|
||||||
j, err := json.Marshal(i.FunctionCall)
|
j, err := json.Marshal(i.FunctionCall)
|
||||||
|
@ -334,7 +338,11 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
||||||
// Otherwise ask the LLM to understand the JSON output and the context, and return a message
|
// Otherwise ask the LLM to understand the JSON output and the context, and return a message
|
||||||
// Note: This costs (in term of CPU) another computation
|
// Note: This costs (in term of CPU) another computation
|
||||||
config.Grammar = ""
|
config.Grammar = ""
|
||||||
predFunc, err := backend.ModelInference(input.Context, predInput, o.Loader, *config, o, nil)
|
images := []string{}
|
||||||
|
for _, m := range input.Messages {
|
||||||
|
images = append(images, m.StringImages...)
|
||||||
|
}
|
||||||
|
predFunc, err := backend.ModelInference(input.Context, predInput, images, o.Loader, *config, o, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error().Msgf("inference error: %s", err.Error())
|
log.Error().Msgf("inference error: %s", err.Error())
|
||||||
return
|
return
|
||||||
|
|
|
@ -12,6 +12,7 @@ import (
|
||||||
config "github.com/go-skynet/LocalAI/api/config"
|
config "github.com/go-skynet/LocalAI/api/config"
|
||||||
"github.com/go-skynet/LocalAI/api/options"
|
"github.com/go-skynet/LocalAI/api/options"
|
||||||
"github.com/go-skynet/LocalAI/api/schema"
|
"github.com/go-skynet/LocalAI/api/schema"
|
||||||
|
"github.com/go-skynet/LocalAI/pkg/grammar"
|
||||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
"github.com/google/uuid"
|
"github.com/google/uuid"
|
||||||
|
@ -64,6 +65,10 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
|
||||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if input.ResponseFormat == "json_object" {
|
||||||
|
input.Grammar = grammar.JSONBNF
|
||||||
|
}
|
||||||
|
|
||||||
log.Debug().Msgf("Parameter Config: %+v", config)
|
log.Debug().Msgf("Parameter Config: %+v", config)
|
||||||
|
|
||||||
if input.Stream {
|
if input.Stream {
|
||||||
|
|
|
@ -23,8 +23,13 @@ func ComputeChoices(
|
||||||
n = 1
|
n = 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
images := []string{}
|
||||||
|
for _, m := range req.Messages {
|
||||||
|
images = append(images, m.StringImages...)
|
||||||
|
}
|
||||||
|
|
||||||
// get the model function to call for the result
|
// get the model function to call for the result
|
||||||
predFunc, err := backend.ModelInference(req.Context, predInput, loader, *config, o, tokenCallback)
|
predFunc, err := backend.ModelInference(req.Context, predInput, images, loader, *config, o, tokenCallback)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return result, backend.TokenUsage{}, err
|
return result, backend.TokenUsage{}, err
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,8 +2,11 @@ package openai
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"encoding/base64"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io/ioutil"
|
||||||
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
|
@ -61,6 +64,37 @@ func readInput(c *fiber.Ctx, o *options.Option, randomModel bool) (string, *sche
|
||||||
return modelFile, input, nil
|
return modelFile, input, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// this function check if the string is an URL, if it's an URL downloads the image in memory
|
||||||
|
// encodes it in base64 and returns the base64 string
|
||||||
|
func getBase64Image(s string) (string, error) {
|
||||||
|
if strings.HasPrefix(s, "http") {
|
||||||
|
// download the image
|
||||||
|
resp, err := http.Get(s)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
// read the image data into memory
|
||||||
|
data, err := ioutil.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
// encode the image data in base64
|
||||||
|
encoded := base64.StdEncoding.EncodeToString(data)
|
||||||
|
|
||||||
|
// return the base64 string
|
||||||
|
return encoded, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// if the string instead is prefixed with "data:image/jpeg;base64,", drop it
|
||||||
|
if strings.HasPrefix(s, "data:image/jpeg;base64,") {
|
||||||
|
return strings.ReplaceAll(s, "data:image/jpeg;base64,", ""), nil
|
||||||
|
}
|
||||||
|
return "", fmt.Errorf("not valid string")
|
||||||
|
}
|
||||||
|
|
||||||
func updateConfig(config *config.Config, input *schema.OpenAIRequest) {
|
func updateConfig(config *config.Config, input *schema.OpenAIRequest) {
|
||||||
if input.Echo {
|
if input.Echo {
|
||||||
config.Echo = input.Echo
|
config.Echo = input.Echo
|
||||||
|
@ -129,6 +163,35 @@ func updateConfig(config *config.Config, input *schema.OpenAIRequest) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Decode each request's message content
|
||||||
|
index := 0
|
||||||
|
for _, m := range input.Messages {
|
||||||
|
switch content := m.Content.(type) {
|
||||||
|
case string:
|
||||||
|
m.StringContent = content
|
||||||
|
case []interface{}:
|
||||||
|
dat, _ := json.Marshal(content)
|
||||||
|
c := []schema.Content{}
|
||||||
|
json.Unmarshal(dat, &c)
|
||||||
|
for _, pp := range c {
|
||||||
|
if pp.Type == "text" {
|
||||||
|
m.StringContent = pp.Text
|
||||||
|
} else if pp.Type == "image_url" {
|
||||||
|
// Detect if pp.ImageURL is an URL, if it is download the image and encode it in base64:
|
||||||
|
base64, err := getBase64Image(pp.ImageURL)
|
||||||
|
if err == nil {
|
||||||
|
m.StringImages = append(m.StringImages, base64) // TODO: make sure that we only return base64 stuff
|
||||||
|
// set a placeholder for each image
|
||||||
|
m.StringContent = m.StringContent + fmt.Sprintf("[img-%d]", index)
|
||||||
|
index++
|
||||||
|
} else {
|
||||||
|
fmt.Print("Failed encoding image", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if input.RepeatPenalty != 0 {
|
if input.RepeatPenalty != 0 {
|
||||||
config.RepeatPenalty = input.RepeatPenalty
|
config.RepeatPenalty = input.RepeatPenalty
|
||||||
}
|
}
|
||||||
|
|
|
@ -55,11 +55,21 @@ type Choice struct {
|
||||||
Text string `json:"text,omitempty"`
|
Text string `json:"text,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type Content struct {
|
||||||
|
Type string `json:"type" yaml:"type"`
|
||||||
|
Text string `json:"text" yaml:"text"`
|
||||||
|
ImageURL string `json:"image_url" yaml:"image_url"`
|
||||||
|
}
|
||||||
|
|
||||||
type Message struct {
|
type Message struct {
|
||||||
// The message role
|
// The message role
|
||||||
Role string `json:"role,omitempty" yaml:"role"`
|
Role string `json:"role,omitempty" yaml:"role"`
|
||||||
// The message content
|
// The message content
|
||||||
Content *string `json:"content" yaml:"content"`
|
Content interface{} `json:"content" yaml:"content"`
|
||||||
|
|
||||||
|
StringContent string `json:"string_content,omitempty" yaml:"string_content,omitempty"`
|
||||||
|
StringImages []string `json:"string_images,omitempty" yaml:"string_images,omitempty"`
|
||||||
|
|
||||||
// A result of a function call
|
// A result of a function call
|
||||||
FunctionCall interface{} `json:"function_call,omitempty" yaml:"function_call,omitempty"`
|
FunctionCall interface{} `json:"function_call,omitempty" yaml:"function_call,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
// llama.cpp gRPC C++ backend server
|
// llama.cpp gRPC C++ backend server
|
||||||
//
|
//
|
||||||
// Ettore Di Giacinto <mudler@localai.io>
|
// Ettore Di Giacinto <mudler@localai.io> and llama.cpp authors
|
||||||
//
|
//
|
||||||
// This is a gRPC server for llama.cpp compatible with the LocalAI proto
|
// This is a gRPC server for llama.cpp compatible with the LocalAI proto
|
||||||
// Note: this is a re-adaptation of the original llama.cpp example/server.cpp for HTTP,
|
// Note: this is a re-adaptation of the original llama.cpp example/server.cpp for HTTP (https://github.com/ggerganov/llama.cpp/tree/master/examples/server),
|
||||||
// but modified to work with gRPC
|
// but modified to work with gRPC
|
||||||
//
|
//
|
||||||
|
|
||||||
|
@ -39,7 +39,7 @@ using grpc::Status;
|
||||||
using backend::HealthMessage;
|
using backend::HealthMessage;
|
||||||
|
|
||||||
|
|
||||||
///// LLAMA.CPP server
|
///// LLAMA.CPP server code below
|
||||||
|
|
||||||
using json = nlohmann::json;
|
using json = nlohmann::json;
|
||||||
|
|
||||||
|
@ -1809,7 +1809,9 @@ static void append_to_generated_text_from_generated_token_probs(llama_server_con
|
||||||
|
|
||||||
/////////////////////////////////
|
/////////////////////////////////
|
||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
//////// LOCALAI
|
//////// LOCALAI code starts below here
|
||||||
|
/////////////////////////////////
|
||||||
|
////////////////////////////////
|
||||||
|
|
||||||
bool loaded_model; // TODO: add a mutex for this, but happens only once loading the model
|
bool loaded_model; // TODO: add a mutex for this, but happens only once loading the model
|
||||||
|
|
||||||
|
@ -1880,6 +1882,16 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
||||||
data["prompt"] = predict->prompt();
|
data["prompt"] = predict->prompt();
|
||||||
data["ignore_eos"] = predict->ignoreeos();
|
data["ignore_eos"] = predict->ignoreeos();
|
||||||
|
|
||||||
|
// for each image in the request, add the image data
|
||||||
|
//
|
||||||
|
for (int i = 0; i < predict->images_size(); i++) {
|
||||||
|
data["image_data"].push_back(json
|
||||||
|
{
|
||||||
|
{"id", i},
|
||||||
|
{"data", predict->images(i)},
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
data["stop"] = predict->stopprompts();
|
data["stop"] = predict->stopprompts();
|
||||||
// data["n_probs"] = predict->nprobs();
|
// data["n_probs"] = predict->nprobs();
|
||||||
//TODO: images,
|
//TODO: images,
|
||||||
|
@ -1953,14 +1965,17 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
||||||
// }
|
// }
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static void params_parse(const backend::ModelOptions* request,
|
static void params_parse(const backend::ModelOptions* request,
|
||||||
gpt_params & params) {
|
gpt_params & params) {
|
||||||
|
|
||||||
// this is comparable to: https://github.com/ggerganov/llama.cpp/blob/d9b33fe95bd257b36c84ee5769cc048230067d6f/examples/server/server.cpp#L1809
|
// this is comparable to: https://github.com/ggerganov/llama.cpp/blob/d9b33fe95bd257b36c84ee5769cc048230067d6f/examples/server/server.cpp#L1809
|
||||||
|
|
||||||
params.model = request->modelfile();
|
params.model = request->modelfile();
|
||||||
|
if (!request->mmproj().empty()) {
|
||||||
|
// get the directory of modelfile
|
||||||
|
std::string model_dir = params.model.substr(0, params.model.find_last_of("/\\"));
|
||||||
|
params.mmproj = model_dir + request->mmproj();
|
||||||
|
}
|
||||||
// params.model_alias ??
|
// params.model_alias ??
|
||||||
params.model_alias = request->modelfile();
|
params.model_alias = request->modelfile();
|
||||||
params.n_ctx = request->contextsize();
|
params.n_ctx = request->contextsize();
|
||||||
|
@ -2071,16 +2086,6 @@ public:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return grpc::Status::OK;
|
|
||||||
|
|
||||||
|
|
||||||
// auto on_complete = [task_id, &llama] (bool)
|
|
||||||
// {
|
|
||||||
// // cancel
|
|
||||||
// llama.request_cancel(task_id);
|
|
||||||
// };
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
return grpc::Status::OK;
|
return grpc::Status::OK;
|
||||||
}
|
}
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -10,6 +10,33 @@ import (
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
JSONBNF = `root ::= object
|
||||||
|
value ::= object | array | string | number | ("true" | "false" | "null") ws
|
||||||
|
|
||||||
|
object ::=
|
||||||
|
"{" ws (
|
||||||
|
string ":" ws value
|
||||||
|
("," ws string ":" ws value)*
|
||||||
|
)? "}" ws
|
||||||
|
|
||||||
|
array ::=
|
||||||
|
"[" ws (
|
||||||
|
value
|
||||||
|
("," ws value)*
|
||||||
|
)? "]" ws
|
||||||
|
|
||||||
|
string ::=
|
||||||
|
"\"" (
|
||||||
|
[^"\\] |
|
||||||
|
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
|
||||||
|
)* "\"" ws
|
||||||
|
|
||||||
|
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
|
||||||
|
|
||||||
|
ws ::= ([ \t\n] ws)?`
|
||||||
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
SPACE_RULE = `" "?`
|
SPACE_RULE = `" "?`
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
// Code generated by protoc-gen-go. DO NOT EDIT.
|
// Code generated by protoc-gen-go. DO NOT EDIT.
|
||||||
// versions:
|
// versions:
|
||||||
// protoc-gen-go v1.26.0
|
// protoc-gen-go v1.26.0
|
||||||
// protoc v3.15.8
|
// protoc v4.23.4
|
||||||
// source: pkg/grpc/proto/backend.proto
|
// source: pkg/grpc/proto/backend.proto
|
||||||
|
|
||||||
package proto
|
package proto
|
||||||
|
@ -156,6 +156,7 @@ type PredictOptions struct {
|
||||||
NegativePromptScale float32 `protobuf:"fixed32,39,opt,name=NegativePromptScale,proto3" json:"NegativePromptScale,omitempty"`
|
NegativePromptScale float32 `protobuf:"fixed32,39,opt,name=NegativePromptScale,proto3" json:"NegativePromptScale,omitempty"`
|
||||||
NegativePrompt string `protobuf:"bytes,40,opt,name=NegativePrompt,proto3" json:"NegativePrompt,omitempty"`
|
NegativePrompt string `protobuf:"bytes,40,opt,name=NegativePrompt,proto3" json:"NegativePrompt,omitempty"`
|
||||||
NDraft int32 `protobuf:"varint,41,opt,name=NDraft,proto3" json:"NDraft,omitempty"`
|
NDraft int32 `protobuf:"varint,41,opt,name=NDraft,proto3" json:"NDraft,omitempty"`
|
||||||
|
Images []string `protobuf:"bytes,42,rep,name=Images,proto3" json:"Images,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (x *PredictOptions) Reset() {
|
func (x *PredictOptions) Reset() {
|
||||||
|
@ -470,6 +471,13 @@ func (x *PredictOptions) GetNDraft() int32 {
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (x *PredictOptions) GetImages() []string {
|
||||||
|
if x != nil {
|
||||||
|
return x.Images
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// The response message containing the result
|
// The response message containing the result
|
||||||
type Reply struct {
|
type Reply struct {
|
||||||
state protoimpl.MessageState
|
state protoimpl.MessageState
|
||||||
|
@ -561,12 +569,14 @@ type ModelOptions struct {
|
||||||
// RWKV
|
// RWKV
|
||||||
Tokenizer string `protobuf:"bytes,34,opt,name=Tokenizer,proto3" json:"Tokenizer,omitempty"`
|
Tokenizer string `protobuf:"bytes,34,opt,name=Tokenizer,proto3" json:"Tokenizer,omitempty"`
|
||||||
// LLM (llama.cpp)
|
// LLM (llama.cpp)
|
||||||
LoraBase string `protobuf:"bytes,35,opt,name=LoraBase,proto3" json:"LoraBase,omitempty"`
|
LoraBase string `protobuf:"bytes,35,opt,name=LoraBase,proto3" json:"LoraBase,omitempty"`
|
||||||
LoraAdapter string `protobuf:"bytes,36,opt,name=LoraAdapter,proto3" json:"LoraAdapter,omitempty"`
|
LoraAdapter string `protobuf:"bytes,36,opt,name=LoraAdapter,proto3" json:"LoraAdapter,omitempty"`
|
||||||
NoMulMatQ bool `protobuf:"varint,37,opt,name=NoMulMatQ,proto3" json:"NoMulMatQ,omitempty"`
|
NoMulMatQ bool `protobuf:"varint,37,opt,name=NoMulMatQ,proto3" json:"NoMulMatQ,omitempty"`
|
||||||
DraftModel string `protobuf:"bytes,39,opt,name=DraftModel,proto3" json:"DraftModel,omitempty"`
|
DraftModel string `protobuf:"bytes,39,opt,name=DraftModel,proto3" json:"DraftModel,omitempty"`
|
||||||
AudioPath string `protobuf:"bytes,38,opt,name=AudioPath,proto3" json:"AudioPath,omitempty"`
|
AudioPath string `protobuf:"bytes,38,opt,name=AudioPath,proto3" json:"AudioPath,omitempty"`
|
||||||
|
// vllm
|
||||||
Quantization string `protobuf:"bytes,40,opt,name=Quantization,proto3" json:"Quantization,omitempty"`
|
Quantization string `protobuf:"bytes,40,opt,name=Quantization,proto3" json:"Quantization,omitempty"`
|
||||||
|
MMProj string `protobuf:"bytes,41,opt,name=MMProj,proto3" json:"MMProj,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (x *ModelOptions) Reset() {
|
func (x *ModelOptions) Reset() {
|
||||||
|
@ -881,6 +891,13 @@ func (x *ModelOptions) GetQuantization() string {
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (x *ModelOptions) GetMMProj() string {
|
||||||
|
if x != nil {
|
||||||
|
return x.MMProj
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
type Result struct {
|
type Result struct {
|
||||||
state protoimpl.MessageState
|
state protoimpl.MessageState
|
||||||
sizeCache protoimpl.SizeCache
|
sizeCache protoimpl.SizeCache
|
||||||
|
@ -1542,7 +1559,7 @@ var file_pkg_grpc_proto_backend_proto_rawDesc = []byte{
|
||||||
0x0a, 0x1c, 0x70, 0x6b, 0x67, 0x2f, 0x67, 0x72, 0x70, 0x63, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f,
|
0x0a, 0x1c, 0x70, 0x6b, 0x67, 0x2f, 0x67, 0x72, 0x70, 0x63, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f,
|
||||||
0x2f, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x07,
|
0x2f, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x07,
|
||||||
0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x22, 0x0f, 0x0a, 0x0d, 0x48, 0x65, 0x61, 0x6c, 0x74,
|
0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x22, 0x0f, 0x0a, 0x0d, 0x48, 0x65, 0x61, 0x6c, 0x74,
|
||||||
0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0xdc, 0x09, 0x0a, 0x0e, 0x50, 0x72, 0x65,
|
0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0xf4, 0x09, 0x0a, 0x0e, 0x50, 0x72, 0x65,
|
||||||
0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x50,
|
0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x50,
|
||||||
0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x50, 0x72, 0x6f,
|
0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x50, 0x72, 0x6f,
|
||||||
0x6d, 0x70, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x53, 0x65, 0x65, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28,
|
0x6d, 0x70, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x53, 0x65, 0x65, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28,
|
||||||
|
@ -1620,85 +1637,88 @@ var file_pkg_grpc_proto_backend_proto_rawDesc = []byte{
|
||||||
0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x28, 0x20, 0x01, 0x28, 0x09, 0x52,
|
0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x28, 0x20, 0x01, 0x28, 0x09, 0x52,
|
||||||
0x0e, 0x4e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12,
|
0x0e, 0x4e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12,
|
||||||
0x16, 0x0a, 0x06, 0x4e, 0x44, 0x72, 0x61, 0x66, 0x74, 0x18, 0x29, 0x20, 0x01, 0x28, 0x05, 0x52,
|
0x16, 0x0a, 0x06, 0x4e, 0x44, 0x72, 0x61, 0x66, 0x74, 0x18, 0x29, 0x20, 0x01, 0x28, 0x05, 0x52,
|
||||||
0x06, 0x4e, 0x44, 0x72, 0x61, 0x66, 0x74, 0x22, 0x21, 0x0a, 0x05, 0x52, 0x65, 0x70, 0x6c, 0x79,
|
0x06, 0x4e, 0x44, 0x72, 0x61, 0x66, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x49, 0x6d, 0x61, 0x67, 0x65,
|
||||||
0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28,
|
0x73, 0x18, 0x2a, 0x20, 0x03, 0x28, 0x09, 0x52, 0x06, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x73, 0x22,
|
||||||
0x0c, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0xbe, 0x09, 0x0a, 0x0c, 0x4d,
|
0x21, 0x0a, 0x05, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73,
|
||||||
0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x4d,
|
0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61,
|
||||||
0x6f, 0x64, 0x65, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x4d, 0x6f, 0x64, 0x65,
|
0x67, 0x65, 0x22, 0xd6, 0x09, 0x0a, 0x0c, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69,
|
||||||
0x6c, 0x12, 0x20, 0x0a, 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x53, 0x69, 0x7a, 0x65,
|
0x6f, 0x6e, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x01, 0x20, 0x01,
|
||||||
0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x53,
|
0x28, 0x09, 0x52, 0x05, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x20, 0x0a, 0x0b, 0x43, 0x6f, 0x6e,
|
||||||
0x69, 0x7a, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x53, 0x65, 0x65, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28,
|
0x74, 0x65, 0x78, 0x74, 0x53, 0x69, 0x7a, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b,
|
||||||
0x05, 0x52, 0x04, 0x53, 0x65, 0x65, 0x64, 0x12, 0x16, 0x0a, 0x06, 0x4e, 0x42, 0x61, 0x74, 0x63,
|
0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x53, 0x69, 0x7a, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x53,
|
||||||
0x68, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x4e, 0x42, 0x61, 0x74, 0x63, 0x68, 0x12,
|
0x65, 0x65, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x53, 0x65, 0x65, 0x64, 0x12,
|
||||||
0x1c, 0x0a, 0x09, 0x46, 0x31, 0x36, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x18, 0x05, 0x20, 0x01,
|
0x16, 0x0a, 0x06, 0x4e, 0x42, 0x61, 0x74, 0x63, 0x68, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52,
|
||||||
0x28, 0x08, 0x52, 0x09, 0x46, 0x31, 0x36, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x12, 0x14, 0x0a,
|
0x06, 0x4e, 0x42, 0x61, 0x74, 0x63, 0x68, 0x12, 0x1c, 0x0a, 0x09, 0x46, 0x31, 0x36, 0x4d, 0x65,
|
||||||
0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x4d, 0x4c,
|
0x6d, 0x6f, 0x72, 0x79, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x46, 0x31, 0x36, 0x4d,
|
||||||
0x6f, 0x63, 0x6b, 0x12, 0x12, 0x0a, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x18, 0x07, 0x20, 0x01, 0x28,
|
0x65, 0x6d, 0x6f, 0x72, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x18, 0x06,
|
||||||
0x08, 0x52, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x12, 0x1c, 0x0a, 0x09, 0x56, 0x6f, 0x63, 0x61, 0x62,
|
0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x12, 0x12, 0x0a, 0x04, 0x4d,
|
||||||
0x4f, 0x6e, 0x6c, 0x79, 0x18, 0x08, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x56, 0x6f, 0x63, 0x61,
|
0x4d, 0x61, 0x70, 0x18, 0x07, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x12,
|
||||||
0x62, 0x4f, 0x6e, 0x6c, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x4c, 0x6f, 0x77, 0x56, 0x52, 0x41, 0x4d,
|
0x1c, 0x0a, 0x09, 0x56, 0x6f, 0x63, 0x61, 0x62, 0x4f, 0x6e, 0x6c, 0x79, 0x18, 0x08, 0x20, 0x01,
|
||||||
0x18, 0x09, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x4c, 0x6f, 0x77, 0x56, 0x52, 0x41, 0x4d, 0x12,
|
0x28, 0x08, 0x52, 0x09, 0x56, 0x6f, 0x63, 0x61, 0x62, 0x4f, 0x6e, 0x6c, 0x79, 0x12, 0x18, 0x0a,
|
||||||
0x1e, 0x0a, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x0a, 0x20,
|
0x07, 0x4c, 0x6f, 0x77, 0x56, 0x52, 0x41, 0x4d, 0x18, 0x09, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07,
|
||||||
0x01, 0x28, 0x08, 0x52, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x12,
|
0x4c, 0x6f, 0x77, 0x56, 0x52, 0x41, 0x4d, 0x12, 0x1e, 0x0a, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64,
|
||||||
0x12, 0x0a, 0x04, 0x4e, 0x55, 0x4d, 0x41, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x4e,
|
0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, 0x45, 0x6d, 0x62,
|
||||||
0x55, 0x4d, 0x41, 0x12, 0x1e, 0x0a, 0x0a, 0x4e, 0x47, 0x50, 0x55, 0x4c, 0x61, 0x79, 0x65, 0x72,
|
0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x4e, 0x55, 0x4d, 0x41, 0x18,
|
||||||
0x73, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0a, 0x4e, 0x47, 0x50, 0x55, 0x4c, 0x61, 0x79,
|
0x0b, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x4e, 0x55, 0x4d, 0x41, 0x12, 0x1e, 0x0a, 0x0a, 0x4e,
|
||||||
0x65, 0x72, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x18, 0x0d,
|
0x47, 0x50, 0x55, 0x4c, 0x61, 0x79, 0x65, 0x72, 0x73, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x05, 0x52,
|
||||||
0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x12, 0x20, 0x0a,
|
0x0a, 0x4e, 0x47, 0x50, 0x55, 0x4c, 0x61, 0x79, 0x65, 0x72, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x4d,
|
||||||
0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74, 0x18, 0x0e, 0x20, 0x01,
|
0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x18, 0x0d, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x4d, 0x61,
|
||||||
0x28, 0x09, 0x52, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74, 0x12,
|
0x69, 0x6e, 0x47, 0x50, 0x55, 0x12, 0x20, 0x0a, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53,
|
||||||
0x18, 0x0a, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, 0x0f, 0x20, 0x01, 0x28, 0x05,
|
0x70, 0x6c, 0x69, 0x74, 0x18, 0x0e, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x54, 0x65, 0x6e, 0x73,
|
||||||
0x52, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x12, 0x2c, 0x0a, 0x11, 0x4c, 0x69, 0x62,
|
0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61,
|
||||||
0x72, 0x61, 0x72, 0x79, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, 0x50, 0x61, 0x74, 0x68, 0x18, 0x10,
|
0x64, 0x73, 0x18, 0x0f, 0x20, 0x01, 0x28, 0x05, 0x52, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64,
|
||||||
0x20, 0x01, 0x28, 0x09, 0x52, 0x11, 0x4c, 0x69, 0x62, 0x72, 0x61, 0x72, 0x79, 0x53, 0x65, 0x61,
|
0x73, 0x12, 0x2c, 0x0a, 0x11, 0x4c, 0x69, 0x62, 0x72, 0x61, 0x72, 0x79, 0x53, 0x65, 0x61, 0x72,
|
||||||
0x72, 0x63, 0x68, 0x50, 0x61, 0x74, 0x68, 0x12, 0x22, 0x0a, 0x0c, 0x52, 0x6f, 0x70, 0x65, 0x46,
|
0x63, 0x68, 0x50, 0x61, 0x74, 0x68, 0x18, 0x10, 0x20, 0x01, 0x28, 0x09, 0x52, 0x11, 0x4c, 0x69,
|
||||||
0x72, 0x65, 0x71, 0x42, 0x61, 0x73, 0x65, 0x18, 0x11, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, 0x52,
|
0x62, 0x72, 0x61, 0x72, 0x79, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, 0x50, 0x61, 0x74, 0x68, 0x12,
|
||||||
0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x42, 0x61, 0x73, 0x65, 0x12, 0x24, 0x0a, 0x0d, 0x52,
|
0x22, 0x0a, 0x0c, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x42, 0x61, 0x73, 0x65, 0x18,
|
||||||
0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x12, 0x20, 0x01,
|
0x11, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0c, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x42,
|
||||||
0x28, 0x02, 0x52, 0x0d, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61, 0x6c,
|
0x61, 0x73, 0x65, 0x12, 0x24, 0x0a, 0x0d, 0x52, 0x6f, 0x70, 0x65, 0x46, 0x72, 0x65, 0x71, 0x53,
|
||||||
0x65, 0x12, 0x1e, 0x0a, 0x0a, 0x52, 0x4d, 0x53, 0x4e, 0x6f, 0x72, 0x6d, 0x45, 0x70, 0x73, 0x18,
|
0x63, 0x61, 0x6c, 0x65, 0x18, 0x12, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0d, 0x52, 0x6f, 0x70, 0x65,
|
||||||
0x13, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0a, 0x52, 0x4d, 0x53, 0x4e, 0x6f, 0x72, 0x6d, 0x45, 0x70,
|
0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x1e, 0x0a, 0x0a, 0x52, 0x4d, 0x53,
|
||||||
0x73, 0x12, 0x12, 0x0a, 0x04, 0x4e, 0x47, 0x51, 0x41, 0x18, 0x14, 0x20, 0x01, 0x28, 0x05, 0x52,
|
0x4e, 0x6f, 0x72, 0x6d, 0x45, 0x70, 0x73, 0x18, 0x13, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0a, 0x52,
|
||||||
0x04, 0x4e, 0x47, 0x51, 0x41, 0x12, 0x1c, 0x0a, 0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x46, 0x69,
|
0x4d, 0x53, 0x4e, 0x6f, 0x72, 0x6d, 0x45, 0x70, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x4e, 0x47, 0x51,
|
||||||
0x6c, 0x65, 0x18, 0x15, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x46,
|
0x41, 0x18, 0x14, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x4e, 0x47, 0x51, 0x41, 0x12, 0x1c, 0x0a,
|
||||||
0x69, 0x6c, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x44, 0x65, 0x76, 0x69, 0x63, 0x65, 0x18, 0x16, 0x20,
|
0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x46, 0x69, 0x6c, 0x65, 0x18, 0x15, 0x20, 0x01, 0x28, 0x09,
|
||||||
0x01, 0x28, 0x09, 0x52, 0x06, 0x44, 0x65, 0x76, 0x69, 0x63, 0x65, 0x12, 0x1c, 0x0a, 0x09, 0x55,
|
0x52, 0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x46, 0x69, 0x6c, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x44,
|
||||||
0x73, 0x65, 0x54, 0x72, 0x69, 0x74, 0x6f, 0x6e, 0x18, 0x17, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09,
|
0x65, 0x76, 0x69, 0x63, 0x65, 0x18, 0x16, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x44, 0x65, 0x76,
|
||||||
0x55, 0x73, 0x65, 0x54, 0x72, 0x69, 0x74, 0x6f, 0x6e, 0x12, 0x24, 0x0a, 0x0d, 0x4d, 0x6f, 0x64,
|
0x69, 0x63, 0x65, 0x12, 0x1c, 0x0a, 0x09, 0x55, 0x73, 0x65, 0x54, 0x72, 0x69, 0x74, 0x6f, 0x6e,
|
||||||
0x65, 0x6c, 0x42, 0x61, 0x73, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x18, 0x18, 0x20, 0x01, 0x28, 0x09,
|
0x18, 0x17, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x55, 0x73, 0x65, 0x54, 0x72, 0x69, 0x74, 0x6f,
|
||||||
0x52, 0x0d, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x42, 0x61, 0x73, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x12,
|
0x6e, 0x12, 0x24, 0x0a, 0x0d, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x42, 0x61, 0x73, 0x65, 0x4e, 0x61,
|
||||||
0x2a, 0x0a, 0x10, 0x55, 0x73, 0x65, 0x46, 0x61, 0x73, 0x74, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69,
|
0x6d, 0x65, 0x18, 0x18, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x42,
|
||||||
0x7a, 0x65, 0x72, 0x18, 0x19, 0x20, 0x01, 0x28, 0x08, 0x52, 0x10, 0x55, 0x73, 0x65, 0x46, 0x61,
|
0x61, 0x73, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x2a, 0x0a, 0x10, 0x55, 0x73, 0x65, 0x46, 0x61,
|
||||||
0x73, 0x74, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x12, 0x22, 0x0a, 0x0c, 0x50,
|
0x73, 0x74, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x18, 0x19, 0x20, 0x01, 0x28,
|
||||||
0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x54, 0x79, 0x70, 0x65, 0x18, 0x1a, 0x20, 0x01, 0x28,
|
0x08, 0x52, 0x10, 0x55, 0x73, 0x65, 0x46, 0x61, 0x73, 0x74, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69,
|
||||||
0x09, 0x52, 0x0c, 0x50, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x54, 0x79, 0x70, 0x65, 0x12,
|
0x7a, 0x65, 0x72, 0x12, 0x22, 0x0a, 0x0c, 0x50, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x54,
|
||||||
0x24, 0x0a, 0x0d, 0x53, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, 0x65, 0x72, 0x54, 0x79, 0x70, 0x65,
|
0x79, 0x70, 0x65, 0x18, 0x1a, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x50, 0x69, 0x70, 0x65, 0x6c,
|
||||||
0x18, 0x1b, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x53, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, 0x65,
|
0x69, 0x6e, 0x65, 0x54, 0x79, 0x70, 0x65, 0x12, 0x24, 0x0a, 0x0d, 0x53, 0x63, 0x68, 0x65, 0x64,
|
||||||
0x72, 0x54, 0x79, 0x70, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x43, 0x55, 0x44, 0x41, 0x18, 0x1c, 0x20,
|
0x75, 0x6c, 0x65, 0x72, 0x54, 0x79, 0x70, 0x65, 0x18, 0x1b, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d,
|
||||||
0x01, 0x28, 0x08, 0x52, 0x04, 0x43, 0x55, 0x44, 0x41, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x46, 0x47,
|
0x53, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, 0x65, 0x72, 0x54, 0x79, 0x70, 0x65, 0x12, 0x12, 0x0a,
|
||||||
0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x1d, 0x20, 0x01, 0x28, 0x02, 0x52, 0x08, 0x43, 0x46, 0x47,
|
0x04, 0x43, 0x55, 0x44, 0x41, 0x18, 0x1c, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x43, 0x55, 0x44,
|
||||||
0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x49, 0x4d, 0x47, 0x32, 0x49, 0x4d, 0x47,
|
0x41, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x46, 0x47, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x18, 0x1d, 0x20,
|
||||||
0x18, 0x1e, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x49, 0x4d, 0x47, 0x32, 0x49, 0x4d, 0x47, 0x12,
|
0x01, 0x28, 0x02, 0x52, 0x08, 0x43, 0x46, 0x47, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x18, 0x0a,
|
||||||
0x1c, 0x0a, 0x09, 0x43, 0x4c, 0x49, 0x50, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x1f, 0x20, 0x01,
|
0x07, 0x49, 0x4d, 0x47, 0x32, 0x49, 0x4d, 0x47, 0x18, 0x1e, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07,
|
||||||
0x28, 0x09, 0x52, 0x09, 0x43, 0x4c, 0x49, 0x50, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x24, 0x0a,
|
0x49, 0x4d, 0x47, 0x32, 0x49, 0x4d, 0x47, 0x12, 0x1c, 0x0a, 0x09, 0x43, 0x4c, 0x49, 0x50, 0x4d,
|
||||||
0x0d, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x75, 0x62, 0x66, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x18, 0x20,
|
0x6f, 0x64, 0x65, 0x6c, 0x18, 0x1f, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x43, 0x4c, 0x49, 0x50,
|
||||||
0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x75, 0x62, 0x66, 0x6f, 0x6c,
|
0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x24, 0x0a, 0x0d, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x75, 0x62,
|
||||||
0x64, 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x18,
|
0x66, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x18, 0x20, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x43, 0x4c,
|
||||||
0x21, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x43, 0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x12,
|
0x49, 0x50, 0x53, 0x75, 0x62, 0x66, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x43,
|
||||||
0x1c, 0x0a, 0x09, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x18, 0x22, 0x20, 0x01,
|
0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x18, 0x21, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x43,
|
||||||
0x28, 0x09, 0x52, 0x09, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x12, 0x1a, 0x0a,
|
0x4c, 0x49, 0x50, 0x53, 0x6b, 0x69, 0x70, 0x12, 0x1c, 0x0a, 0x09, 0x54, 0x6f, 0x6b, 0x65, 0x6e,
|
||||||
0x08, 0x4c, 0x6f, 0x72, 0x61, 0x42, 0x61, 0x73, 0x65, 0x18, 0x23, 0x20, 0x01, 0x28, 0x09, 0x52,
|
0x69, 0x7a, 0x65, 0x72, 0x18, 0x22, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x54, 0x6f, 0x6b, 0x65,
|
||||||
0x08, 0x4c, 0x6f, 0x72, 0x61, 0x42, 0x61, 0x73, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x4c, 0x6f, 0x72,
|
0x6e, 0x69, 0x7a, 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x4c, 0x6f, 0x72, 0x61, 0x42, 0x61, 0x73,
|
||||||
0x61, 0x41, 0x64, 0x61, 0x70, 0x74, 0x65, 0x72, 0x18, 0x24, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b,
|
0x65, 0x18, 0x23, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x4c, 0x6f, 0x72, 0x61, 0x42, 0x61, 0x73,
|
||||||
0x4c, 0x6f, 0x72, 0x61, 0x41, 0x64, 0x61, 0x70, 0x74, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x4e,
|
0x65, 0x12, 0x20, 0x0a, 0x0b, 0x4c, 0x6f, 0x72, 0x61, 0x41, 0x64, 0x61, 0x70, 0x74, 0x65, 0x72,
|
||||||
0x6f, 0x4d, 0x75, 0x6c, 0x4d, 0x61, 0x74, 0x51, 0x18, 0x25, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09,
|
0x18, 0x24, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x4c, 0x6f, 0x72, 0x61, 0x41, 0x64, 0x61, 0x70,
|
||||||
0x4e, 0x6f, 0x4d, 0x75, 0x6c, 0x4d, 0x61, 0x74, 0x51, 0x12, 0x1e, 0x0a, 0x0a, 0x44, 0x72, 0x61,
|
0x74, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x4e, 0x6f, 0x4d, 0x75, 0x6c, 0x4d, 0x61, 0x74, 0x51,
|
||||||
0x66, 0x74, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x27, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x44,
|
0x18, 0x25, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x4e, 0x6f, 0x4d, 0x75, 0x6c, 0x4d, 0x61, 0x74,
|
||||||
0x72, 0x61, 0x66, 0x74, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x1c, 0x0a, 0x09, 0x41, 0x75, 0x64,
|
0x51, 0x12, 0x1e, 0x0a, 0x0a, 0x44, 0x72, 0x61, 0x66, 0x74, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18,
|
||||||
0x69, 0x6f, 0x50, 0x61, 0x74, 0x68, 0x18, 0x26, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x41, 0x75,
|
0x27, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x44, 0x72, 0x61, 0x66, 0x74, 0x4d, 0x6f, 0x64, 0x65,
|
||||||
0x64, 0x69, 0x6f, 0x50, 0x61, 0x74, 0x68, 0x12, 0x22, 0x0a, 0x0c, 0x51, 0x75, 0x61, 0x6e, 0x74,
|
0x6c, 0x12, 0x1c, 0x0a, 0x09, 0x41, 0x75, 0x64, 0x69, 0x6f, 0x50, 0x61, 0x74, 0x68, 0x18, 0x26,
|
||||||
0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x28, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x51,
|
0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x41, 0x75, 0x64, 0x69, 0x6f, 0x50, 0x61, 0x74, 0x68, 0x12,
|
||||||
0x75, 0x61, 0x6e, 0x74, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x22, 0x3c, 0x0a, 0x06, 0x52,
|
0x22, 0x0a, 0x0c, 0x51, 0x75, 0x61, 0x6e, 0x74, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18,
|
||||||
|
0x28, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x51, 0x75, 0x61, 0x6e, 0x74, 0x69, 0x7a, 0x61, 0x74,
|
||||||
|
0x69, 0x6f, 0x6e, 0x12, 0x16, 0x0a, 0x06, 0x4d, 0x4d, 0x50, 0x72, 0x6f, 0x6a, 0x18, 0x29, 0x20,
|
||||||
|
0x01, 0x28, 0x09, 0x52, 0x06, 0x4d, 0x4d, 0x50, 0x72, 0x6f, 0x6a, 0x22, 0x3c, 0x0a, 0x06, 0x52,
|
||||||
0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65,
|
0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65,
|
||||||
0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12,
|
0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12,
|
||||||
0x18, 0x0a, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08,
|
0x18, 0x0a, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08,
|
||||||
|
|
|
@ -64,6 +64,7 @@ message PredictOptions {
|
||||||
float NegativePromptScale = 39;
|
float NegativePromptScale = 39;
|
||||||
string NegativePrompt = 40;
|
string NegativePrompt = 40;
|
||||||
int32 NDraft = 41;
|
int32 NDraft = 41;
|
||||||
|
repeated string Images = 42;
|
||||||
}
|
}
|
||||||
|
|
||||||
// The response message containing the result
|
// The response message containing the result
|
||||||
|
@ -123,6 +124,8 @@ message ModelOptions {
|
||||||
|
|
||||||
// vllm
|
// vllm
|
||||||
string Quantization = 40;
|
string Quantization = 40;
|
||||||
|
|
||||||
|
string MMProj = 41;
|
||||||
}
|
}
|
||||||
|
|
||||||
message Result {
|
message Result {
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
|
// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
|
||||||
// versions:
|
// versions:
|
||||||
// - protoc-gen-go-grpc v1.2.0
|
// - protoc-gen-go-grpc v1.2.0
|
||||||
// - protoc v3.15.8
|
// - protoc v4.23.4
|
||||||
// source: pkg/grpc/proto/backend.proto
|
// source: pkg/grpc/proto/backend.proto
|
||||||
|
|
||||||
package proto
|
package proto
|
||||||
|
|
|
@ -63,7 +63,7 @@ var AutoLoadBackends []string = []string{
|
||||||
// It also loads the model
|
// It also loads the model
|
||||||
func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string) (*grpc.Client, error) {
|
func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string) (*grpc.Client, error) {
|
||||||
return func(modelName, modelFile string) (*grpc.Client, error) {
|
return func(modelName, modelFile string) (*grpc.Client, error) {
|
||||||
log.Debug().Msgf("Loading GRPC Model %s: %+v", backend, *o)
|
log.Debug().Msgf("Loading Model %s with gRPC (file: %s) (backend: %s): %+v", modelName, modelFile, backend, *o)
|
||||||
|
|
||||||
var client *grpc.Client
|
var client *grpc.Client
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue